1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232 AREG, DREG, CREG, BREG,
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
389 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
391 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
393 static rtx gen_push PARAMS ((rtx));
394 static int memory_address_length PARAMS ((rtx addr));
395 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
397 static int ix86_safe_length PARAMS ((rtx));
398 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
399 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
400 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
401 static void ix86_dump_ppro_packet PARAMS ((FILE *));
402 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
403 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
405 static void ix86_init_machine_status PARAMS ((struct function *));
406 static void ix86_mark_machine_status PARAMS ((struct function *));
407 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
421 rtx base, index, disp;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
438 /* Sometimes certain combinations of command options do not make
439 sense on a particular target machine. You can define a macro
440 `OVERRIDE_OPTIONS' to take account of this. This macro, if
441 defined, is executed once just after all the command options have
444 Don't use this macro to turn on various extra optimizations for
445 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
450 /* Comes from final.c -- no real reason to change it. */
451 #define MAX_CODE_ALIGN 16
455 struct processor_costs *cost; /* Processor costs */
456 int target_enable; /* Target flags to enable. */
457 int target_disable; /* Target flags to disable. */
458 int align_loop; /* Default alignments. */
463 const processor_target_table[PROCESSOR_max] =
465 {&i386_cost, 0, 0, 2, 2, 2, 1},
466 {&i486_cost, 0, 0, 4, 4, 4, 1},
467 {&pentium_cost, 0, 0, -4, -4, -4, 1},
468 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
469 {&k6_cost, 0, 0, -5, -5, 4, 1},
470 {&athlon_cost, 0, 0, 4, -4, 4, 1}
475 const char *name; /* processor name or nickname. */
476 enum processor_type processor;
478 const processor_alias_table[] =
480 {"i386", PROCESSOR_I386},
481 {"i486", PROCESSOR_I486},
482 {"i586", PROCESSOR_PENTIUM},
483 {"pentium", PROCESSOR_PENTIUM},
484 {"i686", PROCESSOR_PENTIUMPRO},
485 {"pentiumpro", PROCESSOR_PENTIUMPRO},
486 {"k6", PROCESSOR_K6},
487 {"athlon", PROCESSOR_ATHLON},
490 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
492 #ifdef SUBTARGET_OVERRIDE_OPTIONS
493 SUBTARGET_OVERRIDE_OPTIONS;
496 ix86_arch = PROCESSOR_I386;
497 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499 if (ix86_arch_string != 0)
502 for (i = 0; i < pta_size; i++)
503 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 ix86_arch = processor_alias_table[i].processor;
506 /* Default cpu tuning to the architecture. */
507 ix86_cpu = ix86_arch;
511 error ("bad value (%s) for -march= switch", ix86_arch_string);
514 if (ix86_cpu_string != 0)
517 for (i = 0; i < pta_size; i++)
518 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 ix86_cpu = processor_alias_table[i].processor;
524 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
527 ix86_cost = processor_target_table[ix86_cpu].cost;
528 target_flags |= processor_target_table[ix86_cpu].target_enable;
529 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531 /* Arrange to set up i386_stack_locals for all functions. */
532 init_machine_status = ix86_init_machine_status;
533 mark_machine_status = ix86_mark_machine_status;
535 /* Validate registers in register allocation order. */
536 if (ix86_reg_alloc_order)
539 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
545 case 'a': regno = 0; break;
546 case 'd': regno = 1; break;
547 case 'c': regno = 2; break;
548 case 'b': regno = 3; break;
549 case 'S': regno = 4; break;
550 case 'D': regno = 5; break;
551 case 'B': regno = 6; break;
553 default: fatal ("Register '%c' is unknown", ch);
556 if (regs_allocated[regno])
557 fatal ("Register '%c' already specified in allocation order", ch);
559 regs_allocated[regno] = 1;
563 /* Validate -mregparm= value. */
564 if (ix86_regparm_string)
566 ix86_regparm = atoi (ix86_regparm_string);
567 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
568 fatal ("-mregparm=%d is not between 0 and %d",
569 ix86_regparm, REGPARM_MAX);
572 /* Validate -malign-loops= value, or provide default. */
573 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
574 if (ix86_align_loops_string)
576 ix86_align_loops = atoi (ix86_align_loops_string);
577 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
578 fatal ("-malign-loops=%d is not between 0 and %d",
579 ix86_align_loops, MAX_CODE_ALIGN);
582 /* Validate -malign-jumps= value, or provide default. */
583 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
584 if (ix86_align_jumps_string)
586 ix86_align_jumps = atoi (ix86_align_jumps_string);
587 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
588 fatal ("-malign-jumps=%d is not between 0 and %d",
589 ix86_align_jumps, MAX_CODE_ALIGN);
592 /* Validate -malign-functions= value, or provide default. */
593 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
594 if (ix86_align_funcs_string)
596 ix86_align_funcs = atoi (ix86_align_funcs_string);
597 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
598 fatal ("-malign-functions=%d is not between 0 and %d",
599 ix86_align_funcs, MAX_CODE_ALIGN);
602 /* Validate -mpreferred-stack-boundary= value, or provide default.
603 The default of 128 bits is for Pentium III's SSE __m128. */
604 ix86_preferred_stack_boundary = 128;
605 if (ix86_preferred_stack_boundary_string)
607 int i = atoi (ix86_preferred_stack_boundary_string);
609 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
610 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
613 /* Validate -mbranch-cost= value, or provide default. */
614 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
615 if (ix86_branch_cost_string)
617 ix86_branch_cost = atoi (ix86_branch_cost_string);
618 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
619 fatal ("-mbranch-cost=%d is not between 0 and 5",
623 /* Keep nonleaf frame pointers. */
624 if (TARGET_OMIT_LEAF_FRAME_POINTER)
625 flag_omit_frame_pointer = 1;
627 /* If we're doing fast math, we don't care about comparison order
628 wrt NaNs. This lets us use a shorter comparison sequence. */
630 target_flags &= ~MASK_IEEE_FP;
632 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
635 target_flags |= MASK_MMX;
638 /* A C statement (sans semicolon) to choose the order in which to
639 allocate hard registers for pseudo-registers local to a basic
642 Store the desired register order in the array `reg_alloc_order'.
643 Element 0 should be the register to allocate first; element 1, the
644 next register; and so on.
646 The macro body should not assume anything about the contents of
647 `reg_alloc_order' before execution of the macro.
649 On most machines, it is not necessary to define this macro. */
652 order_regs_for_local_alloc ()
656 /* User specified the register allocation order. */
658 if (ix86_reg_alloc_order)
660 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
666 case 'a': regno = 0; break;
667 case 'd': regno = 1; break;
668 case 'c': regno = 2; break;
669 case 'b': regno = 3; break;
670 case 'S': regno = 4; break;
671 case 'D': regno = 5; break;
672 case 'B': regno = 6; break;
675 reg_alloc_order[order++] = regno;
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 if (! regs_allocated[i])
681 reg_alloc_order[order++] = i;
685 /* If user did not specify a register allocation order, use natural order. */
688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
689 reg_alloc_order[i] = i;
694 optimization_options (level, size)
696 int size ATTRIBUTE_UNUSED;
698 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
699 make the problem with not enough registers even worse. */
700 #ifdef INSN_SCHEDULING
702 flag_schedule_insns = 0;
706 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for DECL. The attributes in ATTRIBUTES have previously been
711 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
712 tree decl ATTRIBUTE_UNUSED;
713 tree attributes ATTRIBUTE_UNUSED;
714 tree identifier ATTRIBUTE_UNUSED;
715 tree args ATTRIBUTE_UNUSED;
720 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
721 attribute for TYPE. The attributes in ATTRIBUTES have previously been
725 ix86_valid_type_attribute_p (type, attributes, identifier, args)
727 tree attributes ATTRIBUTE_UNUSED;
731 if (TREE_CODE (type) != FUNCTION_TYPE
732 && TREE_CODE (type) != METHOD_TYPE
733 && TREE_CODE (type) != FIELD_DECL
734 && TREE_CODE (type) != TYPE_DECL)
737 /* Stdcall attribute says callee is responsible for popping arguments
738 if they are not variable. */
739 if (is_attribute_p ("stdcall", identifier))
740 return (args == NULL_TREE);
742 /* Cdecl attribute says the callee is a normal C declaration. */
743 if (is_attribute_p ("cdecl", identifier))
744 return (args == NULL_TREE);
746 /* Regparm attribute specifies how many integer arguments are to be
747 passed in registers. */
748 if (is_attribute_p ("regparm", identifier))
752 if (! args || TREE_CODE (args) != TREE_LIST
753 || TREE_CHAIN (args) != NULL_TREE
754 || TREE_VALUE (args) == NULL_TREE)
757 cst = TREE_VALUE (args);
758 if (TREE_CODE (cst) != INTEGER_CST)
761 if (compare_tree_int (cst, REGPARM_MAX) > 0)
770 /* Return 0 if the attributes for two types are incompatible, 1 if they
771 are compatible, and 2 if they are nearly compatible (which causes a
772 warning to be generated). */
775 ix86_comp_type_attributes (type1, type2)
779 /* Check for mismatch of non-default calling convention. */
780 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
782 if (TREE_CODE (type1) != FUNCTION_TYPE)
785 /* Check for mismatched return types (cdecl vs stdcall). */
786 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
787 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
792 /* Value is the number of bytes of arguments automatically
793 popped when returning from a subroutine call.
794 FUNDECL is the declaration node of the function (as a tree),
795 FUNTYPE is the data type of the function (as a tree),
796 or for a library call it is an identifier node for the subroutine name.
797 SIZE is the number of bytes of arguments passed on the stack.
799 On the 80386, the RTD insn may be used to pop them if the number
800 of args is fixed, but if the number is variable then the caller
801 must pop them all. RTD can't be used for library calls now
802 because the library is compiled with the Unix compiler.
803 Use of RTD is a selectable option, since it is incompatible with
804 standard Unix calling sequences. If the option is not selected,
805 the caller must always pop the args.
807 The attribute stdcall is equivalent to RTD on a per module basis. */
810 ix86_return_pops_args (fundecl, funtype, size)
815 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
817 /* Cdecl functions override -mrtd, and never pop the stack. */
818 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
820 /* Stdcall functions will pop the stack if not variable args. */
821 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
825 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
826 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
831 /* Lose any fake structure return argument. */
832 if (aggregate_value_p (TREE_TYPE (funtype)))
833 return GET_MODE_SIZE (Pmode);
838 /* Argument support functions. */
840 /* Initialize a variable CUM of type CUMULATIVE_ARGS
841 for a call to a function whose data type is FNTYPE.
842 For a library call, FNTYPE is 0. */
845 init_cumulative_args (cum, fntype, libname)
846 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
847 tree fntype; /* tree ptr for function decl */
848 rtx libname; /* SYMBOL_REF of library name or 0 */
850 static CUMULATIVE_ARGS zero_cum;
851 tree param, next_param;
853 if (TARGET_DEBUG_ARG)
855 fprintf (stderr, "\ninit_cumulative_args (");
857 fprintf (stderr, "fntype code = %s, ret code = %s",
858 tree_code_name[(int) TREE_CODE (fntype)],
859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
861 fprintf (stderr, "no fntype");
864 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
869 /* Set up the number of registers to use for passing arguments. */
870 cum->nregs = ix86_regparm;
873 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
876 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
879 /* Determine if this function has variable arguments. This is
880 indicated by the last argument being 'void_type_mode' if there
881 are no variable arguments. If there are variable arguments, then
882 we won't pass anything in registers */
886 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
887 param != 0; param = next_param)
889 next_param = TREE_CHAIN (param);
890 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
895 if (TARGET_DEBUG_ARG)
896 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
901 /* Update the data in CUM to advance over an argument
902 of mode MODE and data type TYPE.
903 (TYPE is null for libcalls where that information may not be available.) */
906 function_arg_advance (cum, mode, type, named)
907 CUMULATIVE_ARGS *cum; /* current arg information */
908 enum machine_mode mode; /* current arg mode */
909 tree type; /* type of the argument or 0 if lib support */
910 int named; /* whether or not the argument was named */
913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916 if (TARGET_DEBUG_ARG)
918 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
919 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
934 /* Define where to put the arguments to a function.
935 Value is zero to push the argument on the stack,
936 or a hard register in which to store the argument.
938 MODE is the argument's machine mode.
939 TYPE is the data type of the argument (as a tree).
940 This is null for libcalls where that information may
942 CUM is a variable of type CUMULATIVE_ARGS which gives info about
943 the preceding args and about the function being called.
944 NAMED is nonzero if this argument is a named parameter
945 (otherwise it is an extra parameter matching an ellipsis). */
948 function_arg (cum, mode, type, named)
949 CUMULATIVE_ARGS *cum; /* current arg information */
950 enum machine_mode mode; /* current arg mode */
951 tree type; /* type of the argument or 0 if lib support */
952 int named; /* != 0 for normal args, == 0 for ... args */
956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
957 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
961 /* For now, pass fp/complex values on the stack. */
970 if (words <= cum->nregs)
971 ret = gen_rtx_REG (mode, cum->regno);
975 if (TARGET_DEBUG_ARG)
978 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
979 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
982 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 fprintf (stderr, ", stack");
986 fprintf (stderr, " )\n");
993 /* Return nonzero if OP is (const_int 1), else return zero. */
996 const_int_1_operand (op, mode)
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1004 reference and a constant. */
1007 symbolic_operand (op, mode)
1009 enum machine_mode mode ATTRIBUTE_UNUSED;
1011 switch (GET_CODE (op))
1019 if (GET_CODE (op) == SYMBOL_REF
1020 || GET_CODE (op) == LABEL_REF
1021 || (GET_CODE (op) == UNSPEC
1022 && XINT (op, 1) >= 6
1023 && XINT (op, 1) <= 7))
1025 if (GET_CODE (op) != PLUS
1026 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1030 if (GET_CODE (op) == SYMBOL_REF
1031 || GET_CODE (op) == LABEL_REF)
1033 /* Only @GOTOFF gets offsets. */
1034 if (GET_CODE (op) != UNSPEC
1035 || XINT (op, 1) != 7)
1038 op = XVECEXP (op, 0, 0);
1039 if (GET_CODE (op) == SYMBOL_REF
1040 || GET_CODE (op) == LABEL_REF)
1049 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1052 pic_symbolic_operand (op, mode)
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 if (GET_CODE (op) == CONST)
1059 if (GET_CODE (op) == UNSPEC)
1061 if (GET_CODE (op) != PLUS
1062 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1065 if (GET_CODE (op) == UNSPEC)
1071 /* Test for a valid operand for a call instruction. Don't allow the
1072 arg pointer register or virtual regs since they may decay into
1073 reg + const, which the patterns can't handle. */
1076 call_insn_operand (op, mode)
1078 enum machine_mode mode ATTRIBUTE_UNUSED;
1080 /* Disallow indirect through a virtual register. This leads to
1081 compiler aborts when trying to eliminate them. */
1082 if (GET_CODE (op) == REG
1083 && (op == arg_pointer_rtx
1084 || op == frame_pointer_rtx
1085 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1086 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1089 /* Disallow `call 1234'. Due to varying assembler lameness this
1090 gets either rejected or translated to `call .+1234'. */
1091 if (GET_CODE (op) == CONST_INT)
1094 /* Explicitly allow SYMBOL_REF even if pic. */
1095 if (GET_CODE (op) == SYMBOL_REF)
1098 /* Half-pic doesn't allow anything but registers and constants.
1099 We've just taken care of the later. */
1101 return register_operand (op, Pmode);
1103 /* Otherwise we can allow any general_operand in the address. */
1104 return general_operand (op, Pmode);
1108 constant_call_address_operand (op, mode)
1110 enum machine_mode mode ATTRIBUTE_UNUSED;
1112 return GET_CODE (op) == SYMBOL_REF;
1115 /* Match exactly zero and one. */
1118 const0_operand (op, mode)
1120 enum machine_mode mode;
1122 return op == CONST0_RTX (mode);
1126 const1_operand (op, mode)
1128 enum machine_mode mode ATTRIBUTE_UNUSED;
1130 return op == const1_rtx;
1133 /* Match 2, 4, or 8. Used for leal multiplicands. */
1136 const248_operand (op, mode)
1138 enum machine_mode mode ATTRIBUTE_UNUSED;
1140 return (GET_CODE (op) == CONST_INT
1141 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1144 /* True if this is a constant appropriate for an increment or decremenmt. */
1147 incdec_operand (op, mode)
1149 enum machine_mode mode;
1151 if (op == const1_rtx || op == constm1_rtx)
1153 if (GET_CODE (op) != CONST_INT)
1155 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1157 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1159 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1164 /* Return false if this is the stack pointer, or any other fake
1165 register eliminable to the stack pointer. Otherwise, this is
1168 This is used to prevent esp from being used as an index reg.
1169 Which would only happen in pathological cases. */
1172 reg_no_sp_operand (op, mode)
1174 enum machine_mode mode;
1177 if (GET_CODE (t) == SUBREG)
1179 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1182 return register_operand (op, mode);
1186 mmx_reg_operand (op, mode)
1188 enum machine_mode mode ATTRIBUTE_UNUSED;
1190 return MMX_REG_P (op);
1193 /* Return false if this is any eliminable register. Otherwise
1197 general_no_elim_operand (op, mode)
1199 enum machine_mode mode;
1202 if (GET_CODE (t) == SUBREG)
1204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1206 || t == virtual_stack_dynamic_rtx)
1209 return general_operand (op, mode);
1212 /* Return false if this is any eliminable register. Otherwise
1213 register_operand or const_int. */
1216 nonmemory_no_elim_operand (op, mode)
1218 enum machine_mode mode;
1221 if (GET_CODE (t) == SUBREG)
1223 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1224 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1225 || t == virtual_stack_dynamic_rtx)
1228 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1231 /* Return true if op is a Q_REGS class register. */
1234 q_regs_operand (op, mode)
1236 enum machine_mode mode;
1238 if (mode != VOIDmode && GET_MODE (op) != mode)
1240 if (GET_CODE (op) == SUBREG)
1241 op = SUBREG_REG (op);
1242 return QI_REG_P (op);
1245 /* Return true if op is a NON_Q_REGS class register. */
1248 non_q_regs_operand (op, mode)
1250 enum machine_mode mode;
1252 if (mode != VOIDmode && GET_MODE (op) != mode)
1254 if (GET_CODE (op) == SUBREG)
1255 op = SUBREG_REG (op);
1256 return NON_QI_REG_P (op);
1259 /* Return 1 if OP is a comparison operator that can use the condition code
1260 generated by a logical operation, which characteristicly does not set
1261 overflow or carry. To be used with CCNOmode. */
1264 no_comparison_operator (op, mode)
1266 enum machine_mode mode;
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1271 switch (GET_CODE (op))
1275 case LEU: case LTU: case GEU: case GTU:
1283 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1286 sse_comparison_operator (op, mode)
1288 enum machine_mode mode ATTRIBUTE_UNUSED;
1290 enum rtx_code code = GET_CODE (op);
1291 return code == EQ || code == LT || code == LE || code == UNORDERED;
1293 /* Return 1 if OP is a valid comparison operator in valid mode. */
1295 ix86_comparison_operator (op, mode)
1297 enum machine_mode mode;
1299 enum machine_mode inmode;
1300 if (mode != VOIDmode && GET_MODE (op) != mode)
1302 switch (GET_CODE (op))
1307 inmode = GET_MODE (XEXP (op, 0));
1308 if (inmode == CCmode || inmode == CCGCmode
1309 || inmode == CCGOCmode || inmode == CCNOmode)
1312 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1313 inmode = GET_MODE (XEXP (op, 0));
1314 if (inmode == CCmode)
1318 inmode = GET_MODE (XEXP (op, 0));
1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1327 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1330 fcmov_comparison_operator (op, mode)
1332 enum machine_mode mode;
1334 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
1335 if (mode != VOIDmode && GET_MODE (op) != mode)
1337 switch (GET_CODE (op))
1341 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1342 if (inmode == CCFPmode || inmode == CCFPUmode)
1350 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1353 promotable_binary_operator (op, mode)
1355 enum machine_mode mode ATTRIBUTE_UNUSED;
1357 switch (GET_CODE (op))
1360 /* Modern CPUs have same latency for HImode and SImode multiply,
1361 but 386 and 486 do HImode multiply faster. */
1362 return ix86_cpu > PROCESSOR_I486;
1374 /* Nearly general operand, but accept any const_double, since we wish
1375 to be able to drop them into memory rather than have them get pulled
1379 cmp_fp_expander_operand (op, mode)
1381 enum machine_mode mode;
1383 if (mode != VOIDmode && mode != GET_MODE (op))
1385 if (GET_CODE (op) == CONST_DOUBLE)
1387 return general_operand (op, mode);
1390 /* Match an SI or HImode register for a zero_extract. */
1393 ext_register_operand (op, mode)
1395 enum machine_mode mode ATTRIBUTE_UNUSED;
1397 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1399 return register_operand (op, VOIDmode);
1402 /* Return 1 if this is a valid binary floating-point operation.
1403 OP is the expression matched, and MODE is its mode. */
1406 binary_fp_operator (op, mode)
1408 enum machine_mode mode;
1410 if (mode != VOIDmode && mode != GET_MODE (op))
1413 switch (GET_CODE (op))
1419 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1427 mult_operator(op, mode)
1429 enum machine_mode mode ATTRIBUTE_UNUSED;
1431 return GET_CODE (op) == MULT;
1435 div_operator(op, mode)
1437 enum machine_mode mode ATTRIBUTE_UNUSED;
1439 return GET_CODE (op) == DIV;
1443 arith_or_logical_operator (op, mode)
1445 enum machine_mode mode;
1447 return ((mode == VOIDmode || GET_MODE (op) == mode)
1448 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1449 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1452 /* Returns 1 if OP is memory operand with a displacement. */
1455 memory_displacement_operand (op, mode)
1457 enum machine_mode mode;
1459 struct ix86_address parts;
1461 if (! memory_operand (op, mode))
1464 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1467 return parts.disp != NULL_RTX;
1470 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1471 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1473 ??? It seems likely that this will only work because cmpsi is an
1474 expander, and no actual insns use this. */
1477 cmpsi_operand (op, mode)
1479 enum machine_mode mode;
1481 if (general_operand (op, mode))
1484 if (GET_CODE (op) == AND
1485 && GET_MODE (op) == SImode
1486 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1487 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1488 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1489 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1490 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1491 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1497 /* Returns 1 if OP is memory operand that can not be represented by the
1501 long_memory_operand (op, mode)
1503 enum machine_mode mode;
1505 if (! memory_operand (op, mode))
1508 return memory_address_length (op) != 0;
1511 /* Return nonzero if the rtx is known aligned. */
1514 aligned_operand (op, mode)
1516 enum machine_mode mode;
1518 struct ix86_address parts;
1520 if (!general_operand (op, mode))
1523 /* Registers and immediate operands are always "aligned". */
1524 if (GET_CODE (op) != MEM)
1527 /* Don't even try to do any aligned optimizations with volatiles. */
1528 if (MEM_VOLATILE_P (op))
1533 /* Pushes and pops are only valid on the stack pointer. */
1534 if (GET_CODE (op) == PRE_DEC
1535 || GET_CODE (op) == POST_INC)
1538 /* Decode the address. */
1539 if (! ix86_decompose_address (op, &parts))
1542 /* Look for some component that isn't known to be aligned. */
1546 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1551 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1556 if (GET_CODE (parts.disp) != CONST_INT
1557 || (INTVAL (parts.disp) & 3) != 0)
1561 /* Didn't find one -- this must be an aligned address. */
1565 /* Return true if the constant is something that can be loaded with
1566 a special instruction. Only handle 0.0 and 1.0; others are less
1570 standard_80387_constant_p (x)
1573 if (GET_CODE (x) != CONST_DOUBLE)
1576 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1582 if (setjmp (handler))
1585 set_float_handler (handler);
1586 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1587 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1588 is1 = REAL_VALUES_EQUAL (d, dconst1);
1589 set_float_handler (NULL_PTR);
1597 /* Note that on the 80387, other constants, such as pi,
1598 are much slower to load as standard constants
1599 than to load from doubles in memory! */
1600 /* ??? Not true on K6: all constants are equal cost. */
1607 /* Returns 1 if OP contains a symbol reference */
1610 symbolic_reference_mentioned_p (op)
1613 register const char *fmt;
1616 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1619 fmt = GET_RTX_FORMAT (GET_CODE (op));
1620 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1626 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1627 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1631 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1638 /* Return 1 if it is appropriate to emit `ret' instructions in the
1639 body of a function. Do this only if the epilogue is simple, needing a
1640 couple of insns. Prior to reloading, we can't tell how many registers
1641 must be saved, so return 0 then. Return 0 if there is no frame
1642 marker to de-allocate.
1644 If NON_SAVING_SETJMP is defined and true, then it is not possible
1645 for the epilogue to be simple, so return 0. This is a special case
1646 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1647 until final, but jump_optimize may need to know sooner if a
1651 ix86_can_use_return_insn_p ()
1653 HOST_WIDE_INT tsize;
1656 #ifdef NON_SAVING_SETJMP
1657 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1660 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1661 if (profile_block_flag == 2)
1665 if (! reload_completed || frame_pointer_needed)
1668 /* Don't allow more than 32 pop, since that's all we can do
1669 with one instruction. */
1670 if (current_function_pops_args
1671 && current_function_args_size >= 32768)
1674 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1675 return tsize == 0 && nregs == 0;
1678 static const char *pic_label_name;
1679 static int pic_label_output;
1680 static const char *global_offset_table_name;
1682 /* This function generates code for -fpic that loads %ebx with
1683 the return address of the caller and then returns. */
1686 asm_output_function_prefix (file, name)
1688 const char *name ATTRIBUTE_UNUSED;
1691 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1692 || current_function_uses_const_pool);
1693 xops[0] = pic_offset_table_rtx;
1694 xops[1] = stack_pointer_rtx;
1696 /* Deep branch prediction favors having a return for every call. */
1697 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1699 if (!pic_label_output)
1701 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1702 internal (non-global) label that's being emitted, it didn't make
1703 sense to have .type information for local labels. This caused
1704 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1705 me debug info for a label that you're declaring non-global?) this
1706 was changed to call ASM_OUTPUT_LABEL() instead. */
1708 ASM_OUTPUT_LABEL (file, pic_label_name);
1710 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1711 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1712 output_asm_insn ("ret", xops);
1714 pic_label_output = 1;
1720 load_pic_register ()
1724 if (global_offset_table_name == NULL)
1726 global_offset_table_name =
1727 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1728 ggc_add_string_root (&global_offset_table_name, 1);
1730 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1732 if (TARGET_DEEP_BRANCH_PREDICTION)
1734 if (pic_label_name == NULL)
1737 ASM_GENERATE_INTERNAL_LABEL (buf, "LPR", 0);
1738 pic_label_name = ggc_alloc_string (buf, -1);
1739 ggc_add_string_root (&pic_label_name, 1);
1741 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1745 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1748 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1750 if (! TARGET_DEEP_BRANCH_PREDICTION)
1751 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1753 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1756 /* Generate an SImode "push" pattern for input ARG. */
1762 return gen_rtx_SET (VOIDmode,
1763 gen_rtx_MEM (SImode,
1764 gen_rtx_PRE_DEC (SImode,
1765 stack_pointer_rtx)),
1769 /* Return number of registers to be saved on the stack. */
1775 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1776 || current_function_uses_const_pool);
1777 int limit = (frame_pointer_needed
1778 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1781 for (regno = limit - 1; regno >= 0; regno--)
1782 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1783 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1790 /* Return the offset between two registers, one to be eliminated, and the other
1791 its replacement, at the start of a routine. */
1794 ix86_initial_elimination_offset (from, to)
1801 /* Stack grows downward:
1807 saved frame pointer if frame_pointer_needed
1808 <- HARD_FRAME_POINTER
1818 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1819 /* Skip saved PC and previous frame pointer.
1820 Executed only when frame_pointer_needed. */
1822 else if (from == FRAME_POINTER_REGNUM
1823 && to == HARD_FRAME_POINTER_REGNUM)
1825 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1826 padding1 += nregs * UNITS_PER_WORD;
1831 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1832 int frame_size = frame_pointer_needed ? 8 : 4;
1833 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1834 &nregs, &padding1, (int *) 0);
1836 if (to != STACK_POINTER_REGNUM)
1838 else if (from == ARG_POINTER_REGNUM)
1839 return tsize + nregs * UNITS_PER_WORD + frame_size;
1840 else if (from != FRAME_POINTER_REGNUM)
1843 return tsize - padding1;
1847 /* Compute the size of local storage taking into consideration the
1848 desired stack alignment which is to be maintained. Also determine
1849 the number of registers saved below the local storage.
1851 PADDING1 returns padding before stack frame and PADDING2 returns
1852 padding after stack frame;
1855 static HOST_WIDE_INT
1856 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1858 int *nregs_on_stack;
1865 HOST_WIDE_INT total_size;
1866 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1868 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1870 nregs = ix86_nsaved_regs ();
1873 offset = frame_pointer_needed ? 8 : 4;
1875 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1876 since i386 port is the only using those features that may break easilly. */
1878 if (size && !stack_alignment_needed)
1880 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1882 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1884 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1886 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1889 if (stack_alignment_needed < 4)
1890 stack_alignment_needed = 4;
1892 offset += nregs * UNITS_PER_WORD;
1894 if (ACCUMULATE_OUTGOING_ARGS)
1895 total_size += current_function_outgoing_args_size;
1897 total_size += offset;
1899 /* Align start of frame for local function. */
1900 padding1 = ((offset + stack_alignment_needed - 1)
1901 & -stack_alignment_needed) - offset;
1902 total_size += padding1;
1904 /* Align stack boundary. */
1905 padding2 = ((total_size + preferred_alignment - 1)
1906 & -preferred_alignment) - total_size;
1908 if (ACCUMULATE_OUTGOING_ARGS)
1909 padding2 += current_function_outgoing_args_size;
1912 *nregs_on_stack = nregs;
1914 *rpadding1 = padding1;
1916 *rpadding2 = padding2;
1918 return size + padding1 + padding2;
1921 /* Emit code to save registers in the prologue. */
1924 ix86_emit_save_regs ()
1929 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1930 || current_function_uses_const_pool);
1931 limit = (frame_pointer_needed
1932 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1934 for (regno = limit - 1; regno >= 0; regno--)
1935 if ((regs_ever_live[regno] && !call_used_regs[regno])
1936 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1938 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1939 RTX_FRAME_RELATED_P (insn) = 1;
1943 /* Expand the prologue into a bunch of separate insns. */
1946 ix86_expand_prologue ()
1948 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1951 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1952 || current_function_uses_const_pool);
1954 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1955 slower on all targets. Also sdb doesn't like it. */
1957 if (frame_pointer_needed)
1959 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1960 RTX_FRAME_RELATED_P (insn) = 1;
1962 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1963 RTX_FRAME_RELATED_P (insn) = 1;
1966 ix86_emit_save_regs ();
1970 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1972 if (frame_pointer_needed)
1973 insn = emit_insn (gen_pro_epilogue_adjust_stack
1974 (stack_pointer_rtx, stack_pointer_rtx,
1975 GEN_INT (-tsize), hard_frame_pointer_rtx));
1977 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1979 RTX_FRAME_RELATED_P (insn) = 1;
1983 /* ??? Is this only valid for Win32? */
1987 arg0 = gen_rtx_REG (SImode, 0);
1988 emit_move_insn (arg0, GEN_INT (tsize));
1990 sym = gen_rtx_MEM (FUNCTION_MODE,
1991 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1992 insn = emit_call_insn (gen_call (sym, const0_rtx));
1994 CALL_INSN_FUNCTION_USAGE (insn)
1995 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1996 CALL_INSN_FUNCTION_USAGE (insn));
1999 #ifdef SUBTARGET_PROLOGUE
2004 load_pic_register ();
2006 /* If we are profiling, make sure no instructions are scheduled before
2007 the call to mcount. However, if -fpic, the above call will have
2009 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2010 emit_insn (gen_blockage ());
2013 /* Emit code to add TSIZE to esp value. Use POP instruction when
2017 ix86_emit_epilogue_esp_adjustment (tsize)
2020 /* If a frame pointer is present, we must be sure to tie the sp
2021 to the fp so that we don't mis-schedule. */
2022 if (frame_pointer_needed)
2023 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2026 hard_frame_pointer_rtx));
2028 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2032 /* Emit code to restore saved registers using MOV insns. First register
2033 is restored from POINTER + OFFSET. */
2035 ix86_emit_restore_regs_using_mov (pointer, offset)
2040 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2041 || current_function_uses_const_pool);
2042 int limit = (frame_pointer_needed
2043 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2045 for (regno = 0; regno < limit; regno++)
2046 if ((regs_ever_live[regno] && !call_used_regs[regno])
2047 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2049 emit_move_insn (gen_rtx_REG (SImode, regno),
2050 adj_offsettable_operand (gen_rtx_MEM (SImode,
2057 /* Restore function stack, frame, and registers. */
2060 ix86_expand_epilogue (emit_return)
2066 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2067 || current_function_uses_const_pool);
2068 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2069 HOST_WIDE_INT offset;
2070 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2071 (int *) 0, (int *) 0);
2073 /* Calculate start of saved registers relative to ebp. */
2074 offset = -nregs * UNITS_PER_WORD;
2076 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2077 if (profile_block_flag == 2)
2079 FUNCTION_BLOCK_PROFILER_EXIT;
2083 /* If we're only restoring one register and sp is not valid then
2084 using a move instruction to restore the register since it's
2085 less work than reloading sp and popping the register.
2087 The default code result in stack adjustment using add/lea instruction,
2088 while this code results in LEAVE instruction (or discrete equivalent),
2089 so it is profitable in some other cases as well. Especially when there
2090 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2091 and there is exactly one register to pop. This heruistic may need some
2092 tuning in future. */
2093 if ((!sp_valid && nregs <= 1)
2094 || (frame_pointer_needed && !nregs && tsize)
2095 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2098 /* Restore registers. We can use ebp or esp to address the memory
2099 locations. If both are available, default to ebp, since offsets
2100 are known to be small. Only exception is esp pointing directly to the
2101 end of block of saved registers, where we may simplify addressing
2104 if (!frame_pointer_needed || (sp_valid && !tsize))
2105 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2107 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2109 if (!frame_pointer_needed)
2110 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2111 /* If not an i386, mov & pop is faster than "leave". */
2112 else if (TARGET_USE_LEAVE || optimize_size)
2113 emit_insn (gen_leave ());
2116 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2117 hard_frame_pointer_rtx,
2119 hard_frame_pointer_rtx));
2120 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2125 /* First step is to deallocate the stack frame so that we can
2126 pop the registers. */
2129 if (!frame_pointer_needed)
2131 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2132 hard_frame_pointer_rtx,
2134 hard_frame_pointer_rtx));
2137 ix86_emit_epilogue_esp_adjustment (tsize);
2139 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2140 if ((regs_ever_live[regno] && !call_used_regs[regno])
2141 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2142 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2145 /* Sibcall epilogues don't want a return instruction. */
2149 if (current_function_pops_args && current_function_args_size)
2151 rtx popc = GEN_INT (current_function_pops_args);
2153 /* i386 can only pop 64K bytes. If asked to pop more, pop
2154 return address, do explicit add, and jump indirectly to the
2157 if (current_function_pops_args >= 65536)
2159 rtx ecx = gen_rtx_REG (SImode, 2);
2161 emit_insn (gen_popsi1 (ecx));
2162 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2163 emit_jump_insn (gen_return_indirect_internal (ecx));
2166 emit_jump_insn (gen_return_pop_internal (popc));
2169 emit_jump_insn (gen_return_internal ());
2172 /* Extract the parts of an RTL expression that is a valid memory address
2173 for an instruction. Return false if the structure of the address is
2177 ix86_decompose_address (addr, out)
2179 struct ix86_address *out;
2181 rtx base = NULL_RTX;
2182 rtx index = NULL_RTX;
2183 rtx disp = NULL_RTX;
2184 HOST_WIDE_INT scale = 1;
2185 rtx scale_rtx = NULL_RTX;
2187 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2189 else if (GET_CODE (addr) == PLUS)
2191 rtx op0 = XEXP (addr, 0);
2192 rtx op1 = XEXP (addr, 1);
2193 enum rtx_code code0 = GET_CODE (op0);
2194 enum rtx_code code1 = GET_CODE (op1);
2196 if (code0 == REG || code0 == SUBREG)
2198 if (code1 == REG || code1 == SUBREG)
2199 index = op0, base = op1; /* index + base */
2201 base = op0, disp = op1; /* base + displacement */
2203 else if (code0 == MULT)
2205 index = XEXP (op0, 0);
2206 scale_rtx = XEXP (op0, 1);
2207 if (code1 == REG || code1 == SUBREG)
2208 base = op1; /* index*scale + base */
2210 disp = op1; /* index*scale + disp */
2212 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2214 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2215 scale_rtx = XEXP (XEXP (op0, 0), 1);
2216 base = XEXP (op0, 1);
2219 else if (code0 == PLUS)
2221 index = XEXP (op0, 0); /* index + base + disp */
2222 base = XEXP (op0, 1);
2228 else if (GET_CODE (addr) == MULT)
2230 index = XEXP (addr, 0); /* index*scale */
2231 scale_rtx = XEXP (addr, 1);
2233 else if (GET_CODE (addr) == ASHIFT)
2237 /* We're called for lea too, which implements ashift on occasion. */
2238 index = XEXP (addr, 0);
2239 tmp = XEXP (addr, 1);
2240 if (GET_CODE (tmp) != CONST_INT)
2242 scale = INTVAL (tmp);
2243 if ((unsigned HOST_WIDE_INT) scale > 3)
2248 disp = addr; /* displacement */
2250 /* Extract the integral value of scale. */
2253 if (GET_CODE (scale_rtx) != CONST_INT)
2255 scale = INTVAL (scale_rtx);
2258 /* Allow arg pointer and stack pointer as index if there is not scaling */
2259 if (base && index && scale == 1
2260 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2261 || index == stack_pointer_rtx))
2268 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2269 if ((base == hard_frame_pointer_rtx
2270 || base == frame_pointer_rtx
2271 || base == arg_pointer_rtx) && !disp)
2274 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2275 Avoid this by transforming to [%esi+0]. */
2276 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2277 && base && !index && !disp
2279 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2282 /* Special case: encode reg+reg instead of reg*2. */
2283 if (!base && index && scale && scale == 2)
2284 base = index, scale = 1;
2286 /* Special case: scaling cannot be encoded without base or displacement. */
2287 if (!base && !disp && index && scale != 1)
2298 /* Return cost of the memory address x.
2299 For i386, it is better to use a complex address than let gcc copy
2300 the address into a reg and make a new pseudo. But not if the address
2301 requires to two regs - that would mean more pseudos with longer
2304 ix86_address_cost (x)
2307 struct ix86_address parts;
2310 if (!ix86_decompose_address (x, &parts))
2313 /* More complex memory references are better. */
2314 if (parts.disp && parts.disp != const0_rtx)
2317 /* Attempt to minimize number of registers in the address. */
2319 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2321 && (!REG_P (parts.index)
2322 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2326 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2328 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2329 && parts.base != parts.index)
2332 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2333 since it's predecode logic can't detect the length of instructions
2334 and it degenerates to vector decoded. Increase cost of such
2335 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2336 to split such addresses or even refuse such addresses at all.
2338 Following addressing modes are affected:
2343 The first and last case may be avoidable by explicitly coding the zero in
2344 memory address, but I don't have AMD-K6 machine handy to check this
2348 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2349 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2350 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2356 /* If X is a machine specific address (i.e. a symbol or label being
2357 referenced as a displacement from the GOT implemented using an
2358 UNSPEC), then return the base term. Otherwise return X. */
2361 ix86_find_base_term (x)
2366 if (GET_CODE (x) != PLUS
2367 || XEXP (x, 0) != pic_offset_table_rtx
2368 || GET_CODE (XEXP (x, 1)) != CONST)
2371 term = XEXP (XEXP (x, 1), 0);
2373 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2374 term = XEXP (term, 0);
2376 if (GET_CODE (term) != UNSPEC
2377 || XVECLEN (term, 0) != 1
2378 || XINT (term, 1) != 7)
2381 term = XVECEXP (term, 0, 0);
2383 if (GET_CODE (term) != SYMBOL_REF
2384 && GET_CODE (term) != LABEL_REF)
2390 /* Determine if a given CONST RTX is a valid memory displacement
2394 legitimate_pic_address_disp_p (disp)
2397 if (GET_CODE (disp) != CONST)
2399 disp = XEXP (disp, 0);
2401 if (GET_CODE (disp) == PLUS)
2403 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2405 disp = XEXP (disp, 0);
2408 if (GET_CODE (disp) != UNSPEC
2409 || XVECLEN (disp, 0) != 1)
2412 /* Must be @GOT or @GOTOFF. */
2413 if (XINT (disp, 1) != 6
2414 && XINT (disp, 1) != 7)
2417 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2418 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2424 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2425 memory address for an instruction. The MODE argument is the machine mode
2426 for the MEM expression that wants to use this address.
2428 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2429 convert common non-canonical forms to canonical form so that they will
2433 legitimate_address_p (mode, addr, strict)
2434 enum machine_mode mode;
2438 struct ix86_address parts;
2439 rtx base, index, disp;
2440 HOST_WIDE_INT scale;
2441 const char *reason = NULL;
2442 rtx reason_rtx = NULL_RTX;
2444 if (TARGET_DEBUG_ADDR)
2447 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2448 GET_MODE_NAME (mode), strict);
2452 if (! ix86_decompose_address (addr, &parts))
2454 reason = "decomposition failed";
2459 index = parts.index;
2461 scale = parts.scale;
2463 /* Validate base register.
2465 Don't allow SUBREG's here, it can lead to spill failures when the base
2466 is one word out of a two word structure, which is represented internally
2473 if (GET_CODE (base) != REG)
2475 reason = "base is not a register";
2479 if (GET_MODE (base) != Pmode)
2481 reason = "base is not in Pmode";
2485 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2486 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2488 reason = "base is not valid";
2493 /* Validate index register.
2495 Don't allow SUBREG's here, it can lead to spill failures when the index
2496 is one word out of a two word structure, which is represented internally
2503 if (GET_CODE (index) != REG)
2505 reason = "index is not a register";
2509 if (GET_MODE (index) != Pmode)
2511 reason = "index is not in Pmode";
2515 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2516 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2518 reason = "index is not valid";
2523 /* Validate scale factor. */
2526 reason_rtx = GEN_INT (scale);
2529 reason = "scale without index";
2533 if (scale != 2 && scale != 4 && scale != 8)
2535 reason = "scale is not a valid multiplier";
2540 /* Validate displacement. */
2545 if (!CONSTANT_ADDRESS_P (disp))
2547 reason = "displacement is not constant";
2551 if (GET_CODE (disp) == CONST_DOUBLE)
2553 reason = "displacement is a const_double";
2557 if (flag_pic && SYMBOLIC_CONST (disp))
2559 if (! legitimate_pic_address_disp_p (disp))
2561 reason = "displacement is an invalid pic construct";
2565 /* This code used to verify that a symbolic pic displacement
2566 includes the pic_offset_table_rtx register.
2568 While this is good idea, unfortunately these constructs may
2569 be created by "adds using lea" optimization for incorrect
2578 This code is nonsensical, but results in addressing
2579 GOT table with pic_offset_table_rtx base. We can't
2580 just refuse it easilly, since it gets matched by
2581 "addsi3" pattern, that later gets split to lea in the
2582 case output register differs from input. While this
2583 can be handled by separate addsi pattern for this case
2584 that never results in lea, this seems to be easier and
2585 correct fix for crash to disable this test. */
2587 else if (HALF_PIC_P ())
2589 if (! HALF_PIC_ADDRESS_P (disp)
2590 || (base != NULL_RTX || index != NULL_RTX))
2592 reason = "displacement is an invalid half-pic reference";
2598 /* Everything looks valid. */
2599 if (TARGET_DEBUG_ADDR)
2600 fprintf (stderr, "Success.\n");
2604 if (TARGET_DEBUG_ADDR)
2606 fprintf (stderr, "Error: %s\n", reason);
2607 debug_rtx (reason_rtx);
2612 /* Return an unique alias set for the GOT. */
2614 static HOST_WIDE_INT
2615 ix86_GOT_alias_set ()
2617 static HOST_WIDE_INT set = -1;
2619 set = new_alias_set ();
2623 /* Return a legitimate reference for ORIG (an address) using the
2624 register REG. If REG is 0, a new pseudo is generated.
2626 There are two types of references that must be handled:
2628 1. Global data references must load the address from the GOT, via
2629 the PIC reg. An insn is emitted to do this load, and the reg is
2632 2. Static data references, constant pool addresses, and code labels
2633 compute the address as an offset from the GOT, whose base is in
2634 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2635 differentiate them from global data objects. The returned
2636 address is the PIC reg + an unspec constant.
2638 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2639 reg also appears in the address. */
2642 legitimize_pic_address (orig, reg)
2650 if (GET_CODE (addr) == LABEL_REF
2651 || (GET_CODE (addr) == SYMBOL_REF
2652 && (CONSTANT_POOL_ADDRESS_P (addr)
2653 || SYMBOL_REF_FLAG (addr))))
2655 /* This symbol may be referenced via a displacement from the PIC
2656 base address (@GOTOFF). */
2658 current_function_uses_pic_offset_table = 1;
2659 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2660 new = gen_rtx_CONST (Pmode, new);
2661 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2665 emit_move_insn (reg, new);
2669 else if (GET_CODE (addr) == SYMBOL_REF)
2671 /* This symbol must be referenced via a load from the
2672 Global Offset Table (@GOT). */
2674 current_function_uses_pic_offset_table = 1;
2675 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2676 new = gen_rtx_CONST (Pmode, new);
2677 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2678 new = gen_rtx_MEM (Pmode, new);
2679 RTX_UNCHANGING_P (new) = 1;
2680 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2683 reg = gen_reg_rtx (Pmode);
2684 emit_move_insn (reg, new);
2689 if (GET_CODE (addr) == CONST)
2691 addr = XEXP (addr, 0);
2692 if (GET_CODE (addr) == UNSPEC)
2694 /* Check that the unspec is one of the ones we generate? */
2696 else if (GET_CODE (addr) != PLUS)
2699 if (GET_CODE (addr) == PLUS)
2701 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2703 /* Check first to see if this is a constant offset from a @GOTOFF
2704 symbol reference. */
2705 if ((GET_CODE (op0) == LABEL_REF
2706 || (GET_CODE (op0) == SYMBOL_REF
2707 && (CONSTANT_POOL_ADDRESS_P (op0)
2708 || SYMBOL_REF_FLAG (op0))))
2709 && GET_CODE (op1) == CONST_INT)
2711 current_function_uses_pic_offset_table = 1;
2712 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2713 new = gen_rtx_PLUS (Pmode, new, op1);
2714 new = gen_rtx_CONST (Pmode, new);
2715 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2719 emit_move_insn (reg, new);
2725 base = legitimize_pic_address (XEXP (addr, 0), reg);
2726 new = legitimize_pic_address (XEXP (addr, 1),
2727 base == reg ? NULL_RTX : reg);
2729 if (GET_CODE (new) == CONST_INT)
2730 new = plus_constant (base, INTVAL (new));
2733 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2735 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2736 new = XEXP (new, 1);
2738 new = gen_rtx_PLUS (Pmode, base, new);
2746 /* Try machine-dependent ways of modifying an illegitimate address
2747 to be legitimate. If we find one, return the new, valid address.
2748 This macro is used in only one place: `memory_address' in explow.c.
2750 OLDX is the address as it was before break_out_memory_refs was called.
2751 In some cases it is useful to look at this to decide what needs to be done.
2753 MODE and WIN are passed so that this macro can use
2754 GO_IF_LEGITIMATE_ADDRESS.
2756 It is always safe for this macro to do nothing. It exists to recognize
2757 opportunities to optimize the output.
2759 For the 80386, we handle X+REG by loading X into a register R and
2760 using R+REG. R will go in a general reg and indexing will be used.
2761 However, if REG is a broken-out memory address or multiplication,
2762 nothing needs to be done because REG can certainly go in a general reg.
2764 When -fpic is used, special handling is needed for symbolic references.
2765 See comments by legitimize_pic_address in i386.c for details. */
2768 legitimize_address (x, oldx, mode)
2770 register rtx oldx ATTRIBUTE_UNUSED;
2771 enum machine_mode mode;
2776 if (TARGET_DEBUG_ADDR)
2778 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2779 GET_MODE_NAME (mode));
2783 if (flag_pic && SYMBOLIC_CONST (x))
2784 return legitimize_pic_address (x, 0);
2786 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2787 if (GET_CODE (x) == ASHIFT
2788 && GET_CODE (XEXP (x, 1)) == CONST_INT
2789 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2792 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2793 GEN_INT (1 << log));
2796 if (GET_CODE (x) == PLUS)
2798 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2800 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2801 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2802 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2805 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2806 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2807 GEN_INT (1 << log));
2810 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2811 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2812 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2815 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2816 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2817 GEN_INT (1 << log));
2820 /* Put multiply first if it isn't already. */
2821 if (GET_CODE (XEXP (x, 1)) == MULT)
2823 rtx tmp = XEXP (x, 0);
2824 XEXP (x, 0) = XEXP (x, 1);
2829 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2830 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2831 created by virtual register instantiation, register elimination, and
2832 similar optimizations. */
2833 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2836 x = gen_rtx_PLUS (Pmode,
2837 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2838 XEXP (XEXP (x, 1), 0)),
2839 XEXP (XEXP (x, 1), 1));
2843 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2844 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2845 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2846 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2847 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2848 && CONSTANT_P (XEXP (x, 1)))
2851 rtx other = NULL_RTX;
2853 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2855 constant = XEXP (x, 1);
2856 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2858 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2860 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2861 other = XEXP (x, 1);
2869 x = gen_rtx_PLUS (Pmode,
2870 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2871 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2872 plus_constant (other, INTVAL (constant)));
2876 if (changed && legitimate_address_p (mode, x, FALSE))
2879 if (GET_CODE (XEXP (x, 0)) == MULT)
2882 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2885 if (GET_CODE (XEXP (x, 1)) == MULT)
2888 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2892 && GET_CODE (XEXP (x, 1)) == REG
2893 && GET_CODE (XEXP (x, 0)) == REG)
2896 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2899 x = legitimize_pic_address (x, 0);
2902 if (changed && legitimate_address_p (mode, x, FALSE))
2905 if (GET_CODE (XEXP (x, 0)) == REG)
2907 register rtx temp = gen_reg_rtx (Pmode);
2908 register rtx val = force_operand (XEXP (x, 1), temp);
2910 emit_move_insn (temp, val);
2916 else if (GET_CODE (XEXP (x, 1)) == REG)
2918 register rtx temp = gen_reg_rtx (Pmode);
2919 register rtx val = force_operand (XEXP (x, 0), temp);
2921 emit_move_insn (temp, val);
2931 /* Print an integer constant expression in assembler syntax. Addition
2932 and subtraction are the only arithmetic that may appear in these
2933 expressions. FILE is the stdio stream to write to, X is the rtx, and
2934 CODE is the operand print code from the output string. */
2937 output_pic_addr_const (file, x, code)
2944 switch (GET_CODE (x))
2954 assemble_name (file, XSTR (x, 0));
2955 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2956 fputs ("@PLT", file);
2963 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2964 assemble_name (asm_out_file, buf);
2968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2972 /* This used to output parentheses around the expression,
2973 but that does not work on the 386 (either ATT or BSD assembler). */
2974 output_pic_addr_const (file, XEXP (x, 0), code);
2978 if (GET_MODE (x) == VOIDmode)
2980 /* We can use %d if the number is <32 bits and positive. */
2981 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2982 fprintf (file, "0x%lx%08lx",
2983 (unsigned long) CONST_DOUBLE_HIGH (x),
2984 (unsigned long) CONST_DOUBLE_LOW (x));
2986 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2989 /* We can't handle floating point constants;
2990 PRINT_OPERAND must handle them. */
2991 output_operand_lossage ("floating constant misused");
2995 /* Some assemblers need integer constants to appear first. */
2996 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2998 output_pic_addr_const (file, XEXP (x, 0), code);
3000 output_pic_addr_const (file, XEXP (x, 1), code);
3002 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3004 output_pic_addr_const (file, XEXP (x, 1), code);
3006 output_pic_addr_const (file, XEXP (x, 0), code);
3013 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3014 output_pic_addr_const (file, XEXP (x, 0), code);
3016 output_pic_addr_const (file, XEXP (x, 1), code);
3017 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3021 if (XVECLEN (x, 0) != 1)
3023 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3024 switch (XINT (x, 1))
3027 fputs ("@GOT", file);
3030 fputs ("@GOTOFF", file);
3033 fputs ("@PLT", file);
3036 output_operand_lossage ("invalid UNSPEC as operand");
3042 output_operand_lossage ("invalid expression as operand");
3046 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3047 We need to handle our special PIC relocations. */
3050 i386_dwarf_output_addr_const (file, x)
3054 fprintf (file, "%s", INT_ASM_OP);
3056 output_pic_addr_const (file, x, '\0');
3058 output_addr_const (file, x);
3062 /* In the name of slightly smaller debug output, and to cater to
3063 general assembler losage, recognize PIC+GOTOFF and turn it back
3064 into a direct symbol reference. */
3067 i386_simplify_dwarf_addr (orig_x)
3072 if (GET_CODE (x) != PLUS
3073 || GET_CODE (XEXP (x, 0)) != REG
3074 || GET_CODE (XEXP (x, 1)) != CONST)
3077 x = XEXP (XEXP (x, 1), 0);
3078 if (GET_CODE (x) == UNSPEC
3079 && XINT (x, 1) == 7)
3080 return XVECEXP (x, 0, 0);
3082 if (GET_CODE (x) == PLUS
3083 && GET_CODE (XEXP (x, 0)) == UNSPEC
3084 && GET_CODE (XEXP (x, 1)) == CONST_INT
3085 && XINT (XEXP (x, 0), 1) == 7)
3086 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3092 put_condition_code (code, mode, reverse, fp, file)
3094 enum machine_mode mode;
3101 code = reverse_condition (code);
3112 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3117 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3118 Those same assemblers have the same but opposite losage on cmov. */
3121 suffix = fp ? "nbe" : "a";
3124 if (mode == CCNOmode || mode == CCGOCmode)
3126 else if (mode == CCmode || mode == CCGCmode)
3137 if (mode == CCNOmode || mode == CCGOCmode)
3139 else if (mode == CCmode || mode == CCGCmode)
3148 suffix = fp ? "nb" : "ae";
3151 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3169 fputs (suffix, file);
3173 print_reg (x, code, file)
3178 if (REGNO (x) == ARG_POINTER_REGNUM
3179 || REGNO (x) == FRAME_POINTER_REGNUM
3180 || REGNO (x) == FLAGS_REG
3181 || REGNO (x) == FPSR_REG)
3184 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3189 else if (code == 'b')
3191 else if (code == 'k')
3193 else if (code == 'y')
3195 else if (code == 'h')
3197 else if (code == 'm' || MMX_REG_P (x))
3200 code = GET_MODE_SIZE (GET_MODE (x));
3205 fputs (hi_reg_name[REGNO (x)], file);
3208 if (STACK_TOP_P (x))
3210 fputs ("st(0)", file);
3222 fputs (hi_reg_name[REGNO (x)], file);
3225 fputs (qi_reg_name[REGNO (x)], file);
3228 fputs (qi_high_reg_name[REGNO (x)], file);
3236 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3237 C -- print opcode suffix for set/cmov insn.
3238 c -- like C, but print reversed condition
3239 R -- print the prefix for register names.
3240 z -- print the opcode suffix for the size of the current operand.
3241 * -- print a star (in certain assembler syntax)
3242 A -- print an absolute memory reference.
3243 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3244 s -- print a shift double count, followed by the assemblers argument
3246 b -- print the QImode name of the register for the indicated operand.
3247 %b0 would print %al if operands[0] is reg 0.
3248 w -- likewise, print the HImode name of the register.
3249 k -- likewise, print the SImode name of the register.
3250 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3251 y -- print "st(0)" instead of "st" as a register.
3252 m -- print "st(n)" as an mmx register. */
3255 print_operand (file, x, code)
3265 if (ASSEMBLER_DIALECT == 0)
3270 if (ASSEMBLER_DIALECT == 0)
3272 else if (ASSEMBLER_DIALECT == 1)
3274 /* Intel syntax. For absolute addresses, registers should not
3275 be surrounded by braces. */
3276 if (GET_CODE (x) != REG)
3279 PRINT_OPERAND (file, x, 0);
3285 PRINT_OPERAND (file, x, 0);
3290 if (ASSEMBLER_DIALECT == 0)
3295 if (ASSEMBLER_DIALECT == 0)
3300 if (ASSEMBLER_DIALECT == 0)
3305 if (ASSEMBLER_DIALECT == 0)
3310 if (ASSEMBLER_DIALECT == 0)
3315 if (ASSEMBLER_DIALECT == 0)
3320 /* 387 opcodes don't get size suffixes if the operands are
3323 if (STACK_REG_P (x))
3326 /* this is the size of op from size of operand */
3327 switch (GET_MODE_SIZE (GET_MODE (x)))
3330 #ifdef HAVE_GAS_FILDS_FISTS
3336 if (GET_MODE (x) == SFmode)
3350 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3352 #ifdef GAS_MNEMONICS
3378 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3380 PRINT_OPERAND (file, x, 0);
3386 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3389 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3392 /* Like above, but reverse condition */
3394 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3403 sprintf (str, "invalid operand code `%c'", code);
3404 output_operand_lossage (str);
3409 if (GET_CODE (x) == REG)
3411 PRINT_REG (x, code, file);
3414 else if (GET_CODE (x) == MEM)
3416 /* No `byte ptr' prefix for call instructions. */
3417 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3420 switch (GET_MODE_SIZE (GET_MODE (x)))
3422 case 1: size = "BYTE"; break;
3423 case 2: size = "WORD"; break;
3424 case 4: size = "DWORD"; break;
3425 case 8: size = "QWORD"; break;
3426 case 12: size = "XWORD"; break;
3427 case 16: size = "XMMWORD"; break;
3432 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3435 else if (code == 'w')
3437 else if (code == 'k')
3441 fputs (" PTR ", file);
3445 if (flag_pic && CONSTANT_ADDRESS_P (x))
3446 output_pic_addr_const (file, x, code);
3451 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3456 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3457 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3459 if (ASSEMBLER_DIALECT == 0)
3461 fprintf (file, "0x%lx", l);
3464 /* These float cases don't actually occur as immediate operands. */
3465 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3470 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3471 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3472 fprintf (file, "%s", dstr);
3475 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3481 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3482 fprintf (file, "%s", dstr);
3488 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3490 if (ASSEMBLER_DIALECT == 0)
3493 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3494 || GET_CODE (x) == LABEL_REF)
3496 if (ASSEMBLER_DIALECT == 0)
3499 fputs ("OFFSET FLAT:", file);
3502 if (GET_CODE (x) == CONST_INT)
3503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3505 output_pic_addr_const (file, x, code);
3507 output_addr_const (file, x);
3511 /* Print a memory operand whose address is ADDR. */
3514 print_operand_address (file, addr)
3518 struct ix86_address parts;
3519 rtx base, index, disp;
3522 if (! ix86_decompose_address (addr, &parts))
3526 index = parts.index;
3528 scale = parts.scale;
3530 if (!base && !index)
3532 /* Displacement only requires special attention. */
3534 if (GET_CODE (disp) == CONST_INT)
3536 if (ASSEMBLER_DIALECT != 0)
3538 if (USER_LABEL_PREFIX[0] == 0)
3540 fputs ("ds:", file);
3542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3545 output_pic_addr_const (file, addr, 0);
3547 output_addr_const (file, addr);
3551 if (ASSEMBLER_DIALECT == 0)
3556 output_pic_addr_const (file, disp, 0);
3557 else if (GET_CODE (disp) == LABEL_REF)
3558 output_asm_label (disp);
3560 output_addr_const (file, disp);
3565 PRINT_REG (base, 0, file);
3569 PRINT_REG (index, 0, file);
3571 fprintf (file, ",%d", scale);
3577 rtx offset = NULL_RTX;
3581 /* Pull out the offset of a symbol; print any symbol itself. */
3582 if (GET_CODE (disp) == CONST
3583 && GET_CODE (XEXP (disp, 0)) == PLUS
3584 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3586 offset = XEXP (XEXP (disp, 0), 1);
3587 disp = gen_rtx_CONST (VOIDmode,
3588 XEXP (XEXP (disp, 0), 0));
3592 output_pic_addr_const (file, disp, 0);
3593 else if (GET_CODE (disp) == LABEL_REF)
3594 output_asm_label (disp);
3595 else if (GET_CODE (disp) == CONST_INT)
3598 output_addr_const (file, disp);
3604 PRINT_REG (base, 0, file);
3607 if (INTVAL (offset) >= 0)
3609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3613 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3620 PRINT_REG (index, 0, file);
3622 fprintf (file, "*%d", scale);
3629 /* Split one or more DImode RTL references into pairs of SImode
3630 references. The RTL can be REG, offsettable MEM, integer constant, or
3631 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3632 split and "num" is its length. lo_half and hi_half are output arrays
3633 that parallel "operands". */
3636 split_di (operands, num, lo_half, hi_half)
3639 rtx lo_half[], hi_half[];
3643 rtx op = operands[num];
3644 if (CONSTANT_P (op))
3645 split_double (op, &lo_half[num], &hi_half[num]);
3646 else if (! reload_completed)
3648 lo_half[num] = gen_lowpart (SImode, op);
3649 hi_half[num] = gen_highpart (SImode, op);
3651 else if (GET_CODE (op) == REG)
3653 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3654 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3656 else if (offsettable_memref_p (op))
3658 rtx lo_addr = XEXP (op, 0);
3659 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3660 lo_half[num] = change_address (op, SImode, lo_addr);
3661 hi_half[num] = change_address (op, SImode, hi_addr);
3668 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3669 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3670 is the expression of the binary operation. The output may either be
3671 emitted here, or returned to the caller, like all output_* functions.
3673 There is no guarantee that the operands are the same mode, as they
3674 might be within FLOAT or FLOAT_EXTEND expressions. */
3676 #ifndef SYSV386_COMPAT
3677 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3678 wants to fix the assemblers because that causes incompatibility
3679 with gcc. No-one wants to fix gcc because that causes
3680 incompatibility with assemblers... You can use the option of
3681 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3682 #define SYSV386_COMPAT 1
3686 output_387_binary_op (insn, operands)
3690 static char buf[30];
3693 #ifdef ENABLE_CHECKING
3694 /* Even if we do not want to check the inputs, this documents input
3695 constraints. Which helps in understanding the following code. */
3696 if (STACK_REG_P (operands[0])
3697 && ((REG_P (operands[1])
3698 && REGNO (operands[0]) == REGNO (operands[1])
3699 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3700 || (REG_P (operands[2])
3701 && REGNO (operands[0]) == REGNO (operands[2])
3702 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3703 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3709 switch (GET_CODE (operands[3]))
3712 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3713 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3720 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3721 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3728 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3729 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3736 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3737 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3749 switch (GET_CODE (operands[3]))
3753 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3755 rtx temp = operands[2];
3756 operands[2] = operands[1];
3760 /* know operands[0] == operands[1]. */
3762 if (GET_CODE (operands[2]) == MEM)
3768 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3770 if (STACK_TOP_P (operands[0]))
3771 /* How is it that we are storing to a dead operand[2]?
3772 Well, presumably operands[1] is dead too. We can't
3773 store the result to st(0) as st(0) gets popped on this
3774 instruction. Instead store to operands[2] (which I
3775 think has to be st(1)). st(1) will be popped later.
3776 gcc <= 2.8.1 didn't have this check and generated
3777 assembly code that the Unixware assembler rejected. */
3778 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3780 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3784 if (STACK_TOP_P (operands[0]))
3785 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3787 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3792 if (GET_CODE (operands[1]) == MEM)
3798 if (GET_CODE (operands[2]) == MEM)
3804 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3807 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3808 derived assemblers, confusingly reverse the direction of
3809 the operation for fsub{r} and fdiv{r} when the
3810 destination register is not st(0). The Intel assembler
3811 doesn't have this brain damage. Read !SYSV386_COMPAT to
3812 figure out what the hardware really does. */
3813 if (STACK_TOP_P (operands[0]))
3814 p = "{p\t%0, %2|rp\t%2, %0}";
3816 p = "{rp\t%2, %0|p\t%0, %2}";
3818 if (STACK_TOP_P (operands[0]))
3819 /* As above for fmul/fadd, we can't store to st(0). */
3820 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3822 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3827 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3830 if (STACK_TOP_P (operands[0]))
3831 p = "{rp\t%0, %1|p\t%1, %0}";
3833 p = "{p\t%1, %0|rp\t%0, %1}";
3835 if (STACK_TOP_P (operands[0]))
3836 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3838 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3843 if (STACK_TOP_P (operands[0]))
3845 if (STACK_TOP_P (operands[1]))
3846 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3848 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3851 else if (STACK_TOP_P (operands[1]))
3854 p = "{\t%1, %0|r\t%0, %1}";
3856 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3862 p = "{r\t%2, %0|\t%0, %2}";
3864 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3877 /* Output code for INSN to convert a float to a signed int. OPERANDS
3878 are the insn operands. The output may be [HSD]Imode and the input
3879 operand may be [SDX]Fmode. */
3882 output_fix_trunc (insn, operands)
3886 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3887 int dimode_p = GET_MODE (operands[0]) == DImode;
3890 /* Jump through a hoop or two for DImode, since the hardware has no
3891 non-popping instruction. We used to do this a different way, but
3892 that was somewhat fragile and broke with post-reload splitters. */
3893 if (dimode_p && !stack_top_dies)
3894 output_asm_insn ("fld\t%y1", operands);
3896 if (! STACK_TOP_P (operands[1]))
3899 xops[0] = GEN_INT (12);
3900 xops[1] = adj_offsettable_operand (operands[2], 1);
3901 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3903 xops[2] = operands[0];
3904 if (GET_CODE (operands[0]) != MEM)
3905 xops[2] = operands[3];
3907 output_asm_insn ("fnstcw\t%2", operands);
3908 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3909 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3910 output_asm_insn ("fldcw\t%2", operands);
3911 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3913 if (stack_top_dies || dimode_p)
3914 output_asm_insn ("fistp%z2\t%2", xops);
3916 output_asm_insn ("fist%z2\t%2", xops);
3918 output_asm_insn ("fldcw\t%2", operands);
3920 if (GET_CODE (operands[0]) != MEM)
3924 split_di (operands+0, 1, xops+0, xops+1);
3925 split_di (operands+3, 1, xops+2, xops+3);
3926 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3927 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3929 else if (GET_MODE (operands[0]) == SImode)
3930 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3932 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3938 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3939 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3940 when fucom should be used. */
3943 output_fp_compare (insn, operands, eflags_p, unordered_p)
3946 int eflags_p, unordered_p;
3949 rtx cmp_op0 = operands[0];
3950 rtx cmp_op1 = operands[1];
3955 cmp_op1 = operands[2];
3958 if (! STACK_TOP_P (cmp_op0))
3961 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3963 if (STACK_REG_P (cmp_op1)
3965 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3966 && REGNO (cmp_op1) != FIRST_STACK_REG)
3968 /* If both the top of the 387 stack dies, and the other operand
3969 is also a stack register that dies, then this must be a
3970 `fcompp' float compare */
3974 /* There is no double popping fcomi variant. Fortunately,
3975 eflags is immune from the fstp's cc clobbering. */
3977 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3979 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3987 return "fucompp\n\tfnstsw\t%0";
3989 return "fcompp\n\tfnstsw\t%0";
4002 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4004 static const char * const alt[24] =
4016 "fcomi\t{%y1, %0|%0, %y1}",
4017 "fcomip\t{%y1, %0|%0, %y1}",
4018 "fucomi\t{%y1, %0|%0, %y1}",
4019 "fucomip\t{%y1, %0|%0, %y1}",
4026 "fcom%z2\t%y2\n\tfnstsw\t%0",
4027 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4028 "fucom%z2\t%y2\n\tfnstsw\t%0",
4029 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4031 "ficom%z2\t%y2\n\tfnstsw\t%0",
4032 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4040 mask = eflags_p << 3;
4041 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4042 mask |= unordered_p << 1;
4043 mask |= stack_top_dies;
4055 /* Output assembler code to FILE to initialize basic-block profiling.
4057 If profile_block_flag == 2
4059 Output code to call the subroutine `__bb_init_trace_func'
4060 and pass two parameters to it. The first parameter is
4061 the address of a block allocated in the object module.
4062 The second parameter is the number of the first basic block
4065 The name of the block is a local symbol made with this statement:
4067 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4069 Of course, since you are writing the definition of
4070 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4071 can take a short cut in the definition of this macro and use the
4072 name that you know will result.
4074 The number of the first basic block of the function is
4075 passed to the macro in BLOCK_OR_LABEL.
4077 If described in a virtual assembler language the code to be
4081 parameter2 <- BLOCK_OR_LABEL
4082 call __bb_init_trace_func
4084 else if profile_block_flag != 0
4086 Output code to call the subroutine `__bb_init_func'
4087 and pass one single parameter to it, which is the same
4088 as the first parameter to `__bb_init_trace_func'.
4090 The first word of this parameter is a flag which will be nonzero if
4091 the object module has already been initialized. So test this word
4092 first, and do not call `__bb_init_func' if the flag is nonzero.
4093 Note: When profile_block_flag == 2 the test need not be done
4094 but `__bb_init_trace_func' *must* be called.
4096 BLOCK_OR_LABEL may be used to generate a label number as a
4097 branch destination in case `__bb_init_func' will not be called.
4099 If described in a virtual assembler language the code to be
4110 ix86_output_function_block_profiler (file, block_or_label)
4114 static int num_func = 0;
4116 char block_table[80], false_label[80];
4118 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4120 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4121 xops[5] = stack_pointer_rtx;
4122 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4124 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4126 switch (profile_block_flag)
4129 xops[2] = GEN_INT (block_or_label);
4130 xops[3] = gen_rtx_MEM (Pmode,
4131 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4132 xops[6] = GEN_INT (8);
4134 output_asm_insn ("push{l}\t%2", xops);
4136 output_asm_insn ("push{l}\t%1", xops);
4139 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4140 output_asm_insn ("push{l}\t%7", xops);
4142 output_asm_insn ("call\t%P3", xops);
4143 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4147 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4149 xops[0] = const0_rtx;
4150 xops[2] = gen_rtx_MEM (Pmode,
4151 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4152 xops[3] = gen_rtx_MEM (Pmode,
4153 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4154 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4155 xops[6] = GEN_INT (4);
4157 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4159 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4160 output_asm_insn ("jne\t%2", xops);
4163 output_asm_insn ("push{l}\t%1", xops);
4166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4167 output_asm_insn ("push{l}\t%7", xops);
4169 output_asm_insn ("call\t%P3", xops);
4170 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4171 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4177 /* Output assembler code to FILE to increment a counter associated
4178 with basic block number BLOCKNO.
4180 If profile_block_flag == 2
4182 Output code to initialize the global structure `__bb' and
4183 call the function `__bb_trace_func' which will increment the
4186 `__bb' consists of two words. In the first word the number
4187 of the basic block has to be stored. In the second word
4188 the address of a block allocated in the object module
4191 The basic block number is given by BLOCKNO.
4193 The address of the block is given by the label created with
4195 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4197 by FUNCTION_BLOCK_PROFILER.
4199 Of course, since you are writing the definition of
4200 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4201 can take a short cut in the definition of this macro and use the
4202 name that you know will result.
4204 If described in a virtual assembler language the code to be
4207 move BLOCKNO -> (__bb)
4208 move LPBX0 -> (__bb+4)
4209 call __bb_trace_func
4211 Note that function `__bb_trace_func' must not change the
4212 machine state, especially the flag register. To grant
4213 this, you must output code to save and restore registers
4214 either in this macro or in the macros MACHINE_STATE_SAVE
4215 and MACHINE_STATE_RESTORE. The last two macros will be
4216 used in the function `__bb_trace_func', so you must make
4217 sure that the function prologue does not change any
4218 register prior to saving it with MACHINE_STATE_SAVE.
4220 else if profile_block_flag != 0
4222 Output code to increment the counter directly.
4223 Basic blocks are numbered separately from zero within each
4224 compiled object module. The count associated with block number
4225 BLOCKNO is at index BLOCKNO in an array of words; the name of
4226 this array is a local symbol made with this statement:
4228 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4230 Of course, since you are writing the definition of
4231 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4232 can take a short cut in the definition of this macro and use the
4233 name that you know will result.
4235 If described in a virtual assembler language the code to be
4238 inc (LPBX2+4*BLOCKNO)
4242 ix86_output_block_profiler (file, blockno)
4243 FILE *file ATTRIBUTE_UNUSED;
4246 rtx xops[8], cnt_rtx;
4248 char *block_table = counts;
4250 switch (profile_block_flag)
4253 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4255 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4256 xops[2] = GEN_INT (blockno);
4257 xops[3] = gen_rtx_MEM (Pmode,
4258 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4259 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4260 xops[5] = plus_constant (xops[4], 4);
4261 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4262 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4264 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4266 output_asm_insn ("pushf", xops);
4267 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4270 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4271 output_asm_insn ("push{l}\t%7", xops);
4272 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4273 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4274 output_asm_insn ("pop{l}\t%7", xops);
4277 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4278 output_asm_insn ("call\t%P3", xops);
4279 output_asm_insn ("popf", xops);
4284 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4285 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4286 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4289 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4292 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4294 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4295 output_asm_insn ("inc{l}\t%0", xops);
4302 ix86_expand_move (mode, operands)
4303 enum machine_mode mode;
4306 int strict = (reload_in_progress || reload_completed);
4309 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4311 /* Emit insns to move operands[1] into operands[0]. */
4313 if (GET_CODE (operands[0]) == MEM)
4314 operands[1] = force_reg (Pmode, operands[1]);
4317 rtx temp = operands[0];
4318 if (GET_CODE (temp) != REG)
4319 temp = gen_reg_rtx (Pmode);
4320 temp = legitimize_pic_address (operands[1], temp);
4321 if (temp == operands[0])
4328 if (GET_CODE (operands[0]) == MEM
4329 && (GET_MODE (operands[0]) == QImode
4330 || !push_operand (operands[0], mode))
4331 && GET_CODE (operands[1]) == MEM)
4332 operands[1] = force_reg (mode, operands[1]);
4334 if (push_operand (operands[0], mode)
4335 && ! general_no_elim_operand (operands[1], mode))
4336 operands[1] = copy_to_mode_reg (mode, operands[1]);
4338 if (FLOAT_MODE_P (mode))
4340 /* If we are loading a floating point constant to a register,
4341 force the value to memory now, since we'll get better code
4342 out the back end. */
4346 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4347 && register_operand (operands[0], mode))
4348 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4352 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4357 /* Attempt to expand a binary operator. Make the expansion closer to the
4358 actual machine, then just general_operand, which will allow 3 separate
4359 memory references (one output, two input) in a single insn. */
4362 ix86_expand_binary_operator (code, mode, operands)
4364 enum machine_mode mode;
4367 int matching_memory;
4368 rtx src1, src2, dst, op, clob;
4374 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4375 if (GET_RTX_CLASS (code) == 'c'
4376 && (rtx_equal_p (dst, src2)
4377 || immediate_operand (src1, mode)))
4384 /* If the destination is memory, and we do not have matching source
4385 operands, do things in registers. */
4386 matching_memory = 0;
4387 if (GET_CODE (dst) == MEM)
4389 if (rtx_equal_p (dst, src1))
4390 matching_memory = 1;
4391 else if (GET_RTX_CLASS (code) == 'c'
4392 && rtx_equal_p (dst, src2))
4393 matching_memory = 2;
4395 dst = gen_reg_rtx (mode);
4398 /* Both source operands cannot be in memory. */
4399 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4401 if (matching_memory != 2)
4402 src2 = force_reg (mode, src2);
4404 src1 = force_reg (mode, src1);
4407 /* If the operation is not commutable, source 1 cannot be a constant
4408 or non-matching memory. */
4409 if ((CONSTANT_P (src1)
4410 || (!matching_memory && GET_CODE (src1) == MEM))
4411 && GET_RTX_CLASS (code) != 'c')
4412 src1 = force_reg (mode, src1);
4414 /* If optimizing, copy to regs to improve CSE */
4415 if (optimize && ! no_new_pseudos)
4417 if (GET_CODE (dst) == MEM)
4418 dst = gen_reg_rtx (mode);
4419 if (GET_CODE (src1) == MEM)
4420 src1 = force_reg (mode, src1);
4421 if (GET_CODE (src2) == MEM)
4422 src2 = force_reg (mode, src2);
4425 /* Emit the instruction. */
4427 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4428 if (reload_in_progress)
4430 /* Reload doesn't know about the flags register, and doesn't know that
4431 it doesn't want to clobber it. We can only do this with PLUS. */
4438 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4439 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4442 /* Fix up the destination if needed. */
4443 if (dst != operands[0])
4444 emit_move_insn (operands[0], dst);
4447 /* Return TRUE or FALSE depending on whether the binary operator meets the
4448 appropriate constraints. */
4451 ix86_binary_operator_ok (code, mode, operands)
4453 enum machine_mode mode ATTRIBUTE_UNUSED;
4456 /* Both source operands cannot be in memory. */
4457 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4459 /* If the operation is not commutable, source 1 cannot be a constant. */
4460 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4462 /* If the destination is memory, we must have a matching source operand. */
4463 if (GET_CODE (operands[0]) == MEM
4464 && ! (rtx_equal_p (operands[0], operands[1])
4465 || (GET_RTX_CLASS (code) == 'c'
4466 && rtx_equal_p (operands[0], operands[2]))))
4468 /* If the operation is not commutable and the source 1 is memory, we must
4469 have a matching destionation. */
4470 if (GET_CODE (operands[1]) == MEM
4471 && GET_RTX_CLASS (code) != 'c'
4472 && ! rtx_equal_p (operands[0], operands[1]))
4477 /* Attempt to expand a unary operator. Make the expansion closer to the
4478 actual machine, then just general_operand, which will allow 2 separate
4479 memory references (one output, one input) in a single insn. */
4482 ix86_expand_unary_operator (code, mode, operands)
4484 enum machine_mode mode;
4487 int matching_memory;
4488 rtx src, dst, op, clob;
4493 /* If the destination is memory, and we do not have matching source
4494 operands, do things in registers. */
4495 matching_memory = 0;
4496 if (GET_CODE (dst) == MEM)
4498 if (rtx_equal_p (dst, src))
4499 matching_memory = 1;
4501 dst = gen_reg_rtx (mode);
4504 /* When source operand is memory, destination must match. */
4505 if (!matching_memory && GET_CODE (src) == MEM)
4506 src = force_reg (mode, src);
4508 /* If optimizing, copy to regs to improve CSE */
4509 if (optimize && ! no_new_pseudos)
4511 if (GET_CODE (dst) == MEM)
4512 dst = gen_reg_rtx (mode);
4513 if (GET_CODE (src) == MEM)
4514 src = force_reg (mode, src);
4517 /* Emit the instruction. */
4519 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4520 if (reload_in_progress || code == NOT)
4522 /* Reload doesn't know about the flags register, and doesn't know that
4523 it doesn't want to clobber it. */
4530 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4534 /* Fix up the destination if needed. */
4535 if (dst != operands[0])
4536 emit_move_insn (operands[0], dst);
4539 /* Return TRUE or FALSE depending on whether the unary operator meets the
4540 appropriate constraints. */
4543 ix86_unary_operator_ok (code, mode, operands)
4544 enum rtx_code code ATTRIBUTE_UNUSED;
4545 enum machine_mode mode ATTRIBUTE_UNUSED;
4546 rtx operands[2] ATTRIBUTE_UNUSED;
4548 /* If one of operands is memory, source and destination must match. */
4549 if ((GET_CODE (operands[0]) == MEM
4550 || GET_CODE (operands[1]) == MEM)
4551 && ! rtx_equal_p (operands[0], operands[1]))
4556 /* Return TRUE or FALSE depending on whether the first SET in INSN
4557 has source and destination with matching CC modes, and that the
4558 CC mode is at least as constrained as REQ_MODE. */
4561 ix86_match_ccmode (insn, req_mode)
4563 enum machine_mode req_mode;
4566 enum machine_mode set_mode;
4568 set = PATTERN (insn);
4569 if (GET_CODE (set) == PARALLEL)
4570 set = XVECEXP (set, 0, 0);
4571 if (GET_CODE (set) != SET)
4573 if (GET_CODE (SET_SRC (set)) != COMPARE)
4576 set_mode = GET_MODE (SET_DEST (set));
4580 if (req_mode != CCNOmode
4581 && (req_mode != CCmode
4582 || XEXP (SET_SRC (set), 1) != const0_rtx))
4586 if (req_mode == CCGCmode)
4590 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4594 if (req_mode == CCZmode)
4604 return (GET_MODE (SET_SRC (set)) == set_mode);
4607 /* Produce an unsigned comparison for a given signed comparison. */
4609 static enum rtx_code
4610 unsigned_comparison (code)
4642 /* Generate insn patterns to do an integer compare of OPERANDS. */
4645 ix86_expand_int_compare (code, op0, op1)
4649 enum machine_mode cmpmode;
4652 cmpmode = SELECT_CC_MODE (code, op0, op1);
4653 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4655 /* This is very simple, but making the interface the same as in the
4656 FP case makes the rest of the code easier. */
4657 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4658 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4660 /* Return the test that should be put into the flags user, i.e.
4661 the bcc, scc, or cmov instruction. */
4662 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4665 /* Figure out whether to use ordered or unordered fp comparisons.
4666 Return the appropriate mode to use. */
4669 ix86_fp_compare_mode (code)
4677 /* When not doing IEEE compliant compares, fault on NaNs. */
4678 unordered = (TARGET_IEEE_FP != 0);
4681 case LT: case LE: case GT: case GE:
4685 case UNORDERED: case ORDERED:
4686 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4694 /* ??? If we knew whether invalid-operand exceptions were masked,
4695 we could rely on fcom to raise an exception and take care of
4696 NaNs. But we don't. We could know this from c99 math pragmas. */
4700 return unordered ? CCFPUmode : CCFPmode;
4704 ix86_cc_mode (code, op0, op1)
4708 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4709 return ix86_fp_compare_mode (code);
4712 /* Only zero flag is needed. */
4714 case NE: /* ZF!=0 */
4716 /* Codes needing carry flag. */
4717 case GEU: /* CF=0 */
4718 case GTU: /* CF=0 & ZF=0 */
4719 case LTU: /* CF=1 */
4720 case LEU: /* CF=1 | ZF=1 */
4722 /* Codes possibly doable only with sign flag when
4723 comparing against zero. */
4724 case GE: /* SF=OF or SF=0 */
4725 case LT: /* SF<>OF or SF=1 */
4726 if (op1 == const0_rtx)
4729 /* For other cases Carry flag is not required. */
4731 /* Codes doable only with sign flag when comparing
4732 against zero, but we miss jump instruction for it
4733 so we need to use relational tests agains overflow
4734 that thus needs to be zero. */
4735 case GT: /* ZF=0 & SF=OF */
4736 case LE: /* ZF=1 | SF<>OF */
4737 if (op1 == const0_rtx)
4746 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4749 ix86_use_fcomi_compare (code)
4752 return (TARGET_CMOVE
4753 && (code == ORDERED || code == UNORDERED
4754 /* All other unordered compares require checking
4755 multiple sets of bits. */
4756 || ix86_fp_compare_mode (code) == CCFPmode));
4759 /* Swap, force into registers, or otherwise massage the two operands
4760 to a fp comparison. The operands are updated in place; the new
4761 comparsion code is returned. */
4763 static enum rtx_code
4764 ix86_prepare_fp_compare_args (code, pop0, pop1)
4768 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4769 rtx op0 = *pop0, op1 = *pop1;
4770 enum machine_mode op_mode = GET_MODE (op0);
4772 /* All of the unordered compare instructions only work on registers.
4773 The same is true of the XFmode compare instructions. The same is
4774 true of the fcomi compare instructions. */
4776 if (fpcmp_mode == CCFPUmode
4777 || op_mode == XFmode
4778 || ix86_use_fcomi_compare (code))
4780 op0 = force_reg (op_mode, op0);
4781 op1 = force_reg (op_mode, op1);
4785 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4786 things around if they appear profitable, otherwise force op0
4789 if (standard_80387_constant_p (op0) == 0
4790 || (GET_CODE (op0) == MEM
4791 && ! (standard_80387_constant_p (op1) == 0
4792 || GET_CODE (op1) == MEM)))
4795 tmp = op0, op0 = op1, op1 = tmp;
4796 code = swap_condition (code);
4799 if (GET_CODE (op0) != REG)
4800 op0 = force_reg (op_mode, op0);
4802 if (CONSTANT_P (op1))
4804 if (standard_80387_constant_p (op1))
4805 op1 = force_reg (op_mode, op1);
4807 op1 = validize_mem (force_const_mem (op_mode, op1));
4816 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4819 ix86_expand_fp_compare (code, op0, op1, scratch)
4821 rtx op0, op1, scratch;
4823 enum machine_mode fpcmp_mode, intcmp_mode;
4826 fpcmp_mode = ix86_fp_compare_mode (code);
4827 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4829 /* %%% fcomi is probably always faster, even when dealing with memory,
4830 since compare-and-branch would be three insns instead of four. */
4831 if (ix86_use_fcomi_compare (code))
4833 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4834 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4837 /* The FP codes work out to act like unsigned. */
4838 code = unsigned_comparison (code);
4839 intcmp_mode = CCmode;
4843 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4846 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4847 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4848 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4850 if (fpcmp_mode == CCFPmode
4852 || code == UNORDERED)
4854 /* We have two options here -- use sahf, or testing bits of ah
4855 directly. On PPRO, they are equivalent, sahf being one byte
4856 smaller. On Pentium, sahf is non-pairable while test is UV
4859 if (TARGET_USE_SAHF || optimize_size)
4862 emit_insn (gen_x86_sahf_1 (scratch));
4864 /* The FP codes work out to act like unsigned. */
4865 code = unsigned_comparison (code);
4866 intcmp_mode = CCmode;
4871 * The numbers below correspond to the bits of the FPSW in AH.
4872 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4894 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4895 faster in all cases to just fall back on sahf. */
4922 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4923 intcmp_mode = CCNOmode;
4928 /* In the unordered case, we have to check C2 for NaN's, which
4929 doesn't happen to work out to anything nice combination-wise.
4930 So do some bit twiddling on the value we've got in AH to come
4931 up with an appropriate set of condition codes. */
4933 intcmp_mode = CCNOmode;
4937 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4941 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4942 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4943 intcmp_mode = CCmode;
4947 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4952 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4953 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4954 intcmp_mode = CCmode;
4958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4959 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4960 intcmp_mode = CCmode;
4964 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4965 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4970 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4974 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4978 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4982 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4983 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4987 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4988 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4989 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4990 intcmp_mode = CCmode;
4994 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4998 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5002 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5012 /* Return the test that should be put into the flags user, i.e.
5013 the bcc, scc, or cmov instruction. */
5014 return gen_rtx_fmt_ee (code, VOIDmode,
5015 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5020 ix86_expand_compare (code)
5024 op0 = ix86_compare_op0;
5025 op1 = ix86_compare_op1;
5027 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5028 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
5030 ret = ix86_expand_int_compare (code, op0, op1);
5036 ix86_expand_branch (code, label)
5042 switch (GET_MODE (ix86_compare_op0))
5047 tmp = ix86_expand_compare (code);
5048 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5049 gen_rtx_LABEL_REF (VOIDmode, label),
5051 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5057 /* Don't expand the comparison early, so that we get better code
5058 when jump or whoever decides to reverse the comparison. */
5063 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5066 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5067 ix86_compare_op0, ix86_compare_op1);
5068 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5069 gen_rtx_LABEL_REF (VOIDmode, label),
5071 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5073 use_fcomi = ix86_use_fcomi_compare (code);
5074 vec = rtvec_alloc (3 + !use_fcomi);
5075 RTVEC_ELT (vec, 0) = tmp;
5077 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5079 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5082 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5084 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5089 /* Expand DImode branch into multiple compare+branch. */
5091 rtx lo[2], hi[2], label2;
5092 enum rtx_code code1, code2, code3;
5094 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5096 tmp = ix86_compare_op0;
5097 ix86_compare_op0 = ix86_compare_op1;
5098 ix86_compare_op1 = tmp;
5099 code = swap_condition (code);
5101 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5102 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5104 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5105 avoid two branches. This costs one extra insn, so disable when
5106 optimizing for size. */
5108 if ((code == EQ || code == NE)
5110 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5115 if (hi[1] != const0_rtx)
5116 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5117 NULL_RTX, 0, OPTAB_WIDEN);
5120 if (lo[1] != const0_rtx)
5121 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5122 NULL_RTX, 0, OPTAB_WIDEN);
5124 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5125 NULL_RTX, 0, OPTAB_WIDEN);
5127 ix86_compare_op0 = tmp;
5128 ix86_compare_op1 = const0_rtx;
5129 ix86_expand_branch (code, label);
5133 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5134 op1 is a constant and the low word is zero, then we can just
5135 examine the high word. */
5137 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5140 case LT: case LTU: case GE: case GEU:
5141 ix86_compare_op0 = hi[0];
5142 ix86_compare_op1 = hi[1];
5143 ix86_expand_branch (code, label);
5149 /* Otherwise, we need two or three jumps. */
5151 label2 = gen_label_rtx ();
5154 code2 = swap_condition (code);
5155 code3 = unsigned_condition (code);
5159 case LT: case GT: case LTU: case GTU:
5162 case LE: code1 = LT; code2 = GT; break;
5163 case GE: code1 = GT; code2 = LT; break;
5164 case LEU: code1 = LTU; code2 = GTU; break;
5165 case GEU: code1 = GTU; code2 = LTU; break;
5167 case EQ: code1 = NIL; code2 = NE; break;
5168 case NE: code2 = NIL; break;
5176 * if (hi(a) < hi(b)) goto true;
5177 * if (hi(a) > hi(b)) goto false;
5178 * if (lo(a) < lo(b)) goto true;
5182 ix86_compare_op0 = hi[0];
5183 ix86_compare_op1 = hi[1];
5186 ix86_expand_branch (code1, label);
5188 ix86_expand_branch (code2, label2);
5190 ix86_compare_op0 = lo[0];
5191 ix86_compare_op1 = lo[1];
5192 ix86_expand_branch (code3, label);
5195 emit_label (label2);
5205 ix86_expand_setcc (code, dest)
5212 if (GET_MODE (ix86_compare_op0) == DImode)
5213 return 0; /* FAIL */
5215 /* Three modes of generation:
5216 0 -- destination does not overlap compare sources:
5217 clear dest first, emit strict_low_part setcc.
5218 1 -- destination does overlap compare sources:
5219 emit subreg setcc, zero extend.
5220 2 -- destination is in QImode:
5226 if (GET_MODE (dest) == QImode)
5228 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5229 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5233 emit_move_insn (dest, const0_rtx);
5235 ret = ix86_expand_compare (code);
5236 PUT_MODE (ret, QImode);
5241 tmp = gen_lowpart (QImode, dest);
5242 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5246 if (!cse_not_expected)
5247 tmp = gen_reg_rtx (QImode);
5249 tmp = gen_lowpart (QImode, dest);
5252 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5258 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5259 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5260 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5261 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5265 return 1; /* DONE */
5269 ix86_expand_int_movcc (operands)
5272 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5273 rtx compare_seq, compare_op;
5275 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5276 In case comparsion is done with immediate, we can convert it to LTU or
5277 GEU by altering the integer. */
5279 if ((code == LEU || code == GTU)
5280 && GET_CODE (ix86_compare_op1) == CONST_INT
5281 && GET_MODE (operands[0]) != HImode
5282 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5283 && GET_CODE (operands[2]) == CONST_INT
5284 && GET_CODE (operands[3]) == CONST_INT)
5290 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5294 compare_op = ix86_expand_compare (code);
5295 compare_seq = gen_sequence ();
5298 compare_code = GET_CODE (compare_op);
5300 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5301 HImode insns, we'd be swallowed in word prefix ops. */
5303 if (GET_MODE (operands[0]) != HImode
5304 && GET_CODE (operands[2]) == CONST_INT
5305 && GET_CODE (operands[3]) == CONST_INT)
5307 rtx out = operands[0];
5308 HOST_WIDE_INT ct = INTVAL (operands[2]);
5309 HOST_WIDE_INT cf = INTVAL (operands[3]);
5312 if (compare_code == LTU || compare_code == GEU)
5315 /* Detect overlap between destination and compare sources. */
5318 /* To simplify rest of code, restrict to the GEU case. */
5319 if (compare_code == LTU)
5324 compare_code = reverse_condition (compare_code);
5325 code = reverse_condition (code);
5329 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5330 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5331 tmp = gen_reg_rtx (SImode);
5333 emit_insn (compare_seq);
5334 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5346 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5357 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5359 else if (diff == -1 && ct)
5369 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5371 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5378 * andl cf - ct, dest
5383 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5385 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5389 emit_move_insn (out, tmp);
5391 return 1; /* DONE */
5398 tmp = ct, ct = cf, cf = tmp;
5400 compare_code = reverse_condition (compare_code);
5401 code = reverse_condition (code);
5403 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5404 || diff == 3 || diff == 5 || diff == 9)
5410 * lea cf(dest*(ct-cf)),dest
5414 * This also catches the degenerate setcc-only case.
5420 out = emit_store_flag (out, code, ix86_compare_op0,
5421 ix86_compare_op1, VOIDmode, 0, 1);
5428 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5432 tmp = gen_rtx_PLUS (SImode, tmp, out);
5438 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5444 emit_move_insn (out, tmp);
5449 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5450 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5452 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5453 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5457 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5459 if (out != operands[0])
5460 emit_move_insn (operands[0], out);
5462 return 1; /* DONE */
5466 * General case: Jumpful:
5467 * xorl dest,dest cmpl op1, op2
5468 * cmpl op1, op2 movl ct, dest
5470 * decl dest movl cf, dest
5471 * andl (cf-ct),dest 1:
5476 * This is reasonably steep, but branch mispredict costs are
5477 * high on modern cpus, so consider failing only if optimizing
5480 * %%% Parameterize branch_cost on the tuning architecture, then
5481 * use that. The 80386 couldn't care less about mispredicts.
5484 if (!optimize_size && !TARGET_CMOVE)
5490 compare_code = reverse_condition (compare_code);
5491 code = reverse_condition (code);
5494 out = emit_store_flag (out, code, ix86_compare_op0,
5495 ix86_compare_op1, VOIDmode, 0, 1);
5497 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5498 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5500 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5501 if (out != operands[0])
5502 emit_move_insn (operands[0], out);
5504 return 1; /* DONE */
5510 /* Try a few things more with specific constants and a variable. */
5513 rtx var, orig_out, out, tmp;
5516 return 0; /* FAIL */
5518 /* If one of the two operands is an interesting constant, load a
5519 constant with the above and mask it in with a logical operation. */
5521 if (GET_CODE (operands[2]) == CONST_INT)
5524 if (INTVAL (operands[2]) == 0)
5525 operands[3] = constm1_rtx, op = and_optab;
5526 else if (INTVAL (operands[2]) == -1)
5527 operands[3] = const0_rtx, op = ior_optab;
5529 return 0; /* FAIL */
5531 else if (GET_CODE (operands[3]) == CONST_INT)
5534 if (INTVAL (operands[3]) == 0)
5535 operands[2] = constm1_rtx, op = and_optab;
5536 else if (INTVAL (operands[3]) == -1)
5537 operands[2] = const0_rtx, op = ior_optab;
5539 return 0; /* FAIL */
5542 return 0; /* FAIL */
5544 orig_out = operands[0];
5545 tmp = gen_reg_rtx (GET_MODE (orig_out));
5548 /* Recurse to get the constant loaded. */
5549 if (ix86_expand_int_movcc (operands) == 0)
5550 return 0; /* FAIL */
5552 /* Mask in the interesting variable. */
5553 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5555 if (out != orig_out)
5556 emit_move_insn (orig_out, out);
5558 return 1; /* DONE */
5562 * For comparison with above,
5572 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5573 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5574 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5575 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5577 emit_insn (compare_seq);
5578 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5579 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5580 compare_op, operands[2],
5583 return 1; /* DONE */
5587 ix86_expand_fp_movcc (operands)
5591 enum machine_mode mode;
5594 /* The floating point conditional move instructions don't directly
5595 support conditions resulting from a signed integer comparison. */
5597 code = GET_CODE (operands[1]);
5610 tmp = gen_reg_rtx (QImode);
5611 ix86_expand_setcc (code, tmp);
5613 ix86_compare_op0 = tmp;
5614 ix86_compare_op1 = const0_rtx;
5621 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5622 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5623 gen_rtx_COMPARE (mode,
5625 ix86_compare_op1)));
5626 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5627 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5628 gen_rtx_fmt_ee (code, VOIDmode,
5629 gen_rtx_REG (mode, FLAGS_REG),
5637 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5638 works for floating pointer parameters and nonoffsetable memories.
5639 For pushes, it returns just stack offsets; the values will be saved
5640 in the right order. Maximally three parts are generated. */
5643 ix86_split_to_parts (operand, parts, mode)
5646 enum machine_mode mode;
5648 int size = GET_MODE_SIZE (mode) / 4;
5650 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5652 if (size < 2 || size > 3)
5655 /* Optimize constant pool reference to immediates. This is used by fp moves,
5656 that force all constants to memory to allow combining. */
5658 if (GET_CODE (operand) == MEM
5659 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5660 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5661 operand = get_pool_constant (XEXP (operand, 0));
5663 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5665 /* The only non-offsetable memories we handle are pushes. */
5666 if (! push_operand (operand, VOIDmode))
5669 PUT_MODE (operand, SImode);
5670 parts[0] = parts[1] = parts[2] = operand;
5675 split_di (&operand, 1, &parts[0], &parts[1]);
5678 if (REG_P (operand))
5680 if (!reload_completed)
5682 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5683 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5685 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5687 else if (offsettable_memref_p (operand))
5689 PUT_MODE (operand, SImode);
5691 parts[1] = adj_offsettable_operand (operand, 4);
5693 parts[2] = adj_offsettable_operand (operand, 8);
5695 else if (GET_CODE (operand) == CONST_DOUBLE)
5700 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5704 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5705 parts[2] = GEN_INT (l[2]);
5708 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5713 parts[1] = GEN_INT (l[1]);
5714 parts[0] = GEN_INT (l[0]);
5724 /* Emit insns to perform a move or push of DI, DF, and XF values.
5725 Return false when normal moves are needed; true when all required
5726 insns have been emitted. Operands 2-4 contain the input values
5727 int the correct order; operands 5-7 contain the output values. */
5730 ix86_split_long_move (operands1)
5735 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5739 /* Make our own copy to avoid clobbering the operands. */
5740 operands[0] = copy_rtx (operands1[0]);
5741 operands[1] = copy_rtx (operands1[1]);
5743 if (size < 2 || size > 3)
5746 /* The only non-offsettable memory we handle is push. */
5747 if (push_operand (operands[0], VOIDmode))
5749 else if (GET_CODE (operands[0]) == MEM
5750 && ! offsettable_memref_p (operands[0]))
5753 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5754 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5756 /* When emitting push, take care for source operands on the stack. */
5757 if (push && GET_CODE (operands[1]) == MEM
5758 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5761 part[1][1] = part[1][2];
5762 part[1][0] = part[1][1];
5765 /* We need to do copy in the right order in case an address register
5766 of the source overlaps the destination. */
5767 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5769 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5771 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5774 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5777 /* Collision in the middle part can be handled by reordering. */
5778 if (collisions == 1 && size == 3
5779 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5782 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5783 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5786 /* If there are more collisions, we can't handle it by reordering.
5787 Do an lea to the last part and use only one colliding move. */
5788 else if (collisions > 1)
5791 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5792 XEXP (part[1][0], 0)));
5793 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5794 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5796 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5803 emit_insn (gen_push (part[1][2]));
5804 emit_insn (gen_push (part[1][1]));
5805 emit_insn (gen_push (part[1][0]));
5809 /* Choose correct order to not overwrite the source before it is copied. */
5810 if ((REG_P (part[0][0])
5811 && REG_P (part[1][1])
5812 && (REGNO (part[0][0]) == REGNO (part[1][1])
5814 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5816 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5820 operands1[2] = part[0][2];
5821 operands1[3] = part[0][1];
5822 operands1[4] = part[0][0];
5823 operands1[5] = part[1][2];
5824 operands1[6] = part[1][1];
5825 operands1[7] = part[1][0];
5829 operands1[2] = part[0][1];
5830 operands1[3] = part[0][0];
5831 operands1[5] = part[1][1];
5832 operands1[6] = part[1][0];
5839 operands1[2] = part[0][0];
5840 operands1[3] = part[0][1];
5841 operands1[4] = part[0][2];
5842 operands1[5] = part[1][0];
5843 operands1[6] = part[1][1];
5844 operands1[7] = part[1][2];
5848 operands1[2] = part[0][0];
5849 operands1[3] = part[0][1];
5850 operands1[5] = part[1][0];
5851 operands1[6] = part[1][1];
5859 ix86_split_ashldi (operands, scratch)
5860 rtx *operands, scratch;
5862 rtx low[2], high[2];
5865 if (GET_CODE (operands[2]) == CONST_INT)
5867 split_di (operands, 2, low, high);
5868 count = INTVAL (operands[2]) & 63;
5872 emit_move_insn (high[0], low[1]);
5873 emit_move_insn (low[0], const0_rtx);
5876 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5880 if (!rtx_equal_p (operands[0], operands[1]))
5881 emit_move_insn (operands[0], operands[1]);
5882 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5883 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5888 if (!rtx_equal_p (operands[0], operands[1]))
5889 emit_move_insn (operands[0], operands[1]);
5891 split_di (operands, 1, low, high);
5893 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5894 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5896 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5898 if (! no_new_pseudos)
5899 scratch = force_reg (SImode, const0_rtx);
5901 emit_move_insn (scratch, const0_rtx);
5903 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5907 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5912 ix86_split_ashrdi (operands, scratch)
5913 rtx *operands, scratch;
5915 rtx low[2], high[2];
5918 if (GET_CODE (operands[2]) == CONST_INT)
5920 split_di (operands, 2, low, high);
5921 count = INTVAL (operands[2]) & 63;
5925 emit_move_insn (low[0], high[1]);
5927 if (! reload_completed)
5928 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5931 emit_move_insn (high[0], low[0]);
5932 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5936 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5940 if (!rtx_equal_p (operands[0], operands[1]))
5941 emit_move_insn (operands[0], operands[1]);
5942 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5943 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5948 if (!rtx_equal_p (operands[0], operands[1]))
5949 emit_move_insn (operands[0], operands[1]);
5951 split_di (operands, 1, low, high);
5953 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5954 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5956 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5958 if (! no_new_pseudos)
5959 scratch = gen_reg_rtx (SImode);
5960 emit_move_insn (scratch, high[0]);
5961 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5962 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5966 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5971 ix86_split_lshrdi (operands, scratch)
5972 rtx *operands, scratch;
5974 rtx low[2], high[2];
5977 if (GET_CODE (operands[2]) == CONST_INT)
5979 split_di (operands, 2, low, high);
5980 count = INTVAL (operands[2]) & 63;
5984 emit_move_insn (low[0], high[1]);
5985 emit_move_insn (high[0], const0_rtx);
5988 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5992 if (!rtx_equal_p (operands[0], operands[1]))
5993 emit_move_insn (operands[0], operands[1]);
5994 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5995 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6000 if (!rtx_equal_p (operands[0], operands[1]))
6001 emit_move_insn (operands[0], operands[1]);
6003 split_di (operands, 1, low, high);
6005 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6006 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6008 /* Heh. By reversing the arguments, we can reuse this pattern. */
6009 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6011 if (! no_new_pseudos)
6012 scratch = force_reg (SImode, const0_rtx);
6014 emit_move_insn (scratch, const0_rtx);
6016 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6020 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6024 /* Expand the appropriate insns for doing strlen if not just doing
6027 out = result, initialized with the start address
6028 align_rtx = alignment of the address.
6029 scratch = scratch register, initialized with the startaddress when
6030 not aligned, otherwise undefined
6032 This is just the body. It needs the initialisations mentioned above and
6033 some address computing at the end. These things are done in i386.md. */
6036 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6037 rtx out, align_rtx, scratch;
6041 rtx align_2_label = NULL_RTX;
6042 rtx align_3_label = NULL_RTX;
6043 rtx align_4_label = gen_label_rtx ();
6044 rtx end_0_label = gen_label_rtx ();
6046 rtx tmpreg = gen_reg_rtx (SImode);
6049 if (GET_CODE (align_rtx) == CONST_INT)
6050 align = INTVAL (align_rtx);
6052 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6054 /* Is there a known alignment and is it less than 4? */
6057 /* Is there a known alignment and is it not 2? */
6060 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6061 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6063 /* Leave just the 3 lower bits. */
6064 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6065 NULL_RTX, 0, OPTAB_WIDEN);
6067 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6068 SImode, 1, 0, align_4_label);
6069 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6070 SImode, 1, 0, align_2_label);
6071 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6072 SImode, 1, 0, align_3_label);
6076 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6077 check if is aligned to 4 - byte. */
6079 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6080 NULL_RTX, 0, OPTAB_WIDEN);
6082 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6083 SImode, 1, 0, align_4_label);
6086 mem = gen_rtx_MEM (QImode, out);
6088 /* Now compare the bytes. */
6090 /* Compare the first n unaligned byte on a byte per byte basis. */
6091 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6092 QImode, 1, 0, end_0_label);
6094 /* Increment the address. */
6095 emit_insn (gen_addsi3 (out, out, const1_rtx));
6097 /* Not needed with an alignment of 2 */
6100 emit_label (align_2_label);
6102 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6103 QImode, 1, 0, end_0_label);
6105 emit_insn (gen_addsi3 (out, out, const1_rtx));
6107 emit_label (align_3_label);
6110 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6111 QImode, 1, 0, end_0_label);
6113 emit_insn (gen_addsi3 (out, out, const1_rtx));
6116 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6117 align this loop. It gives only huge programs, but does not help to
6119 emit_label (align_4_label);
6121 mem = gen_rtx_MEM (SImode, out);
6122 emit_move_insn (scratch, mem);
6123 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6125 /* This formula yields a nonzero result iff one of the bytes is zero.
6126 This saves three branches inside loop and many cycles. */
6128 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6129 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6130 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6131 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6132 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6133 SImode, 1, 0, align_4_label);
6137 rtx reg = gen_reg_rtx (SImode);
6138 emit_move_insn (reg, tmpreg);
6139 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6141 /* If zero is not in the first two bytes, move two bytes forward. */
6142 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6143 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6144 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6145 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6146 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6149 /* Emit lea manually to avoid clobbering of flags. */
6150 emit_insn (gen_rtx_SET (SImode, reg,
6151 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6154 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6155 emit_insn (gen_rtx_SET (VOIDmode, out,
6156 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6163 rtx end_2_label = gen_label_rtx ();
6164 /* Is zero in the first two bytes? */
6166 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6167 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6168 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6169 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6170 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6172 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6173 JUMP_LABEL (tmp) = end_2_label;
6175 /* Not in the first two. Move two bytes forward. */
6176 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6177 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6179 emit_label (end_2_label);
6183 /* Avoid branch in fixing the byte. */
6184 tmpreg = gen_lowpart (QImode, tmpreg);
6185 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6186 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6188 emit_label (end_0_label);
6191 /* Clear stack slot assignments remembered from previous functions.
6192 This is called from INIT_EXPANDERS once before RTL is emitted for each
6196 ix86_init_machine_status (p)
6199 enum machine_mode mode;
6202 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6204 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6205 mode = (enum machine_mode) ((int) mode + 1))
6206 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6207 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6210 /* Mark machine specific bits of P for GC. */
6212 ix86_mark_machine_status (p)
6215 enum machine_mode mode;
6218 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6219 mode = (enum machine_mode) ((int) mode + 1))
6220 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6221 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6224 /* Return a MEM corresponding to a stack slot with mode MODE.
6225 Allocate a new slot if necessary.
6227 The RTL for a function can have several slots available: N is
6228 which slot to use. */
6231 assign_386_stack_local (mode, n)
6232 enum machine_mode mode;
6235 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6238 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6239 ix86_stack_locals[(int) mode][n]
6240 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6242 return ix86_stack_locals[(int) mode][n];
6245 /* Calculate the length of the memory address in the instruction
6246 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6249 memory_address_length (addr)
6252 struct ix86_address parts;
6253 rtx base, index, disp;
6256 if (GET_CODE (addr) == PRE_DEC
6257 || GET_CODE (addr) == POST_INC)
6260 if (! ix86_decompose_address (addr, &parts))
6264 index = parts.index;
6268 /* Register Indirect. */
6269 if (base && !index && !disp)
6271 /* Special cases: ebp and esp need the two-byte modrm form. */
6272 if (addr == stack_pointer_rtx
6273 || addr == arg_pointer_rtx
6274 || addr == frame_pointer_rtx
6275 || addr == hard_frame_pointer_rtx)
6279 /* Direct Addressing. */
6280 else if (disp && !base && !index)
6285 /* Find the length of the displacement constant. */
6288 if (GET_CODE (disp) == CONST_INT
6289 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6295 /* An index requires the two-byte modrm form. */
6303 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6304 expect that insn have 8bit immediate alternative. */
6306 ix86_attr_length_immediate_default (insn, shortform)
6312 extract_insn_cached (insn);
6313 for (i = recog_data.n_operands - 1; i >= 0; --i)
6314 if (CONSTANT_P (recog_data.operand[i]))
6319 && GET_CODE (recog_data.operand[i]) == CONST_INT
6320 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6324 switch (get_attr_mode (insn))
6336 fatal_insn ("Unknown insn mode", insn);
6342 /* Compute default value for "length_address" attribute. */
6344 ix86_attr_length_address_default (insn)
6348 extract_insn_cached (insn);
6349 for (i = recog_data.n_operands - 1; i >= 0; --i)
6350 if (GET_CODE (recog_data.operand[i]) == MEM)
6352 return memory_address_length (XEXP (recog_data.operand[i], 0));
6358 /* Return the maximum number of instructions a cpu can issue. */
6365 case PROCESSOR_PENTIUM:
6369 case PROCESSOR_PENTIUMPRO:
6377 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6378 by DEP_INSN and nothing set by DEP_INSN. */
6381 ix86_flags_dependant (insn, dep_insn, insn_type)
6383 enum attr_type insn_type;
6387 /* Simplify the test for uninteresting insns. */
6388 if (insn_type != TYPE_SETCC
6389 && insn_type != TYPE_ICMOV
6390 && insn_type != TYPE_FCMOV
6391 && insn_type != TYPE_IBR)
6394 if ((set = single_set (dep_insn)) != 0)
6396 set = SET_DEST (set);
6399 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6400 && XVECLEN (PATTERN (dep_insn), 0) == 2
6401 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6402 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6404 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6405 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6410 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6413 /* This test is true if the dependant insn reads the flags but
6414 not any other potentially set register. */
6415 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6418 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6424 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6425 address with operands set by DEP_INSN. */
6428 ix86_agi_dependant (insn, dep_insn, insn_type)
6430 enum attr_type insn_type;
6434 if (insn_type == TYPE_LEA)
6436 addr = PATTERN (insn);
6437 if (GET_CODE (addr) == SET)
6439 else if (GET_CODE (addr) == PARALLEL
6440 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6441 addr = XVECEXP (addr, 0, 0);
6444 addr = SET_SRC (addr);
6449 extract_insn_cached (insn);
6450 for (i = recog_data.n_operands - 1; i >= 0; --i)
6451 if (GET_CODE (recog_data.operand[i]) == MEM)
6453 addr = XEXP (recog_data.operand[i], 0);
6460 return modified_in_p (addr, dep_insn);
6464 ix86_adjust_cost (insn, link, dep_insn, cost)
6465 rtx insn, link, dep_insn;
6468 enum attr_type insn_type, dep_insn_type;
6469 enum attr_memory memory;
6471 int dep_insn_code_number;
6473 /* Anti and output depenancies have zero cost on all CPUs. */
6474 if (REG_NOTE_KIND (link) != 0)
6477 dep_insn_code_number = recog_memoized (dep_insn);
6479 /* If we can't recognize the insns, we can't really do anything. */
6480 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6483 insn_type = get_attr_type (insn);
6484 dep_insn_type = get_attr_type (dep_insn);
6486 /* Prologue and epilogue allocators can have a false dependency on ebp.
6487 This results in one cycle extra stall on Pentium prologue scheduling,
6488 so handle this important case manually. */
6489 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6490 && dep_insn_type == TYPE_ALU
6491 && !reg_mentioned_p (stack_pointer_rtx, insn))
6496 case PROCESSOR_PENTIUM:
6497 /* Address Generation Interlock adds a cycle of latency. */
6498 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6501 /* ??? Compares pair with jump/setcc. */
6502 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6505 /* Floating point stores require value to be ready one cycle ealier. */
6506 if (insn_type == TYPE_FMOV
6507 && get_attr_memory (insn) == MEMORY_STORE
6508 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6512 case PROCESSOR_PENTIUMPRO:
6513 /* Since we can't represent delayed latencies of load+operation,
6514 increase the cost here for non-imov insns. */
6515 if (dep_insn_type != TYPE_IMOV
6516 && dep_insn_type != TYPE_FMOV
6517 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6518 || memory == MEMORY_BOTH))
6521 /* INT->FP conversion is expensive. */
6522 if (get_attr_fp_int_src (dep_insn))
6525 /* There is one cycle extra latency between an FP op and a store. */
6526 if (insn_type == TYPE_FMOV
6527 && (set = single_set (dep_insn)) != NULL_RTX
6528 && (set2 = single_set (insn)) != NULL_RTX
6529 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6530 && GET_CODE (SET_DEST (set2)) == MEM)
6535 /* The esp dependency is resolved before the instruction is really
6537 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6538 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6541 /* Since we can't represent delayed latencies of load+operation,
6542 increase the cost here for non-imov insns. */
6543 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6544 || memory == MEMORY_BOTH)
6545 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6547 /* INT->FP conversion is expensive. */
6548 if (get_attr_fp_int_src (dep_insn))
6552 case PROCESSOR_ATHLON:
6553 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6554 || memory == MEMORY_BOTH)
6556 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6571 struct ppro_sched_data
6574 int issued_this_cycle;
6579 ix86_safe_length (insn)
6582 if (recog_memoized (insn) >= 0)
6583 return get_attr_length(insn);
6589 ix86_safe_length_prefix (insn)
6592 if (recog_memoized (insn) >= 0)
6593 return get_attr_length(insn);
6598 static enum attr_memory
6599 ix86_safe_memory (insn)
6602 if (recog_memoized (insn) >= 0)
6603 return get_attr_memory(insn);
6605 return MEMORY_UNKNOWN;
6608 static enum attr_pent_pair
6609 ix86_safe_pent_pair (insn)
6612 if (recog_memoized (insn) >= 0)
6613 return get_attr_pent_pair(insn);
6615 return PENT_PAIR_NP;
6618 static enum attr_ppro_uops
6619 ix86_safe_ppro_uops (insn)
6622 if (recog_memoized (insn) >= 0)
6623 return get_attr_ppro_uops (insn);
6625 return PPRO_UOPS_MANY;
6629 ix86_dump_ppro_packet (dump)
6632 if (ix86_sched_data.ppro.decode[0])
6634 fprintf (dump, "PPRO packet: %d",
6635 INSN_UID (ix86_sched_data.ppro.decode[0]));
6636 if (ix86_sched_data.ppro.decode[1])
6637 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6638 if (ix86_sched_data.ppro.decode[2])
6639 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6644 /* We're beginning a new block. Initialize data structures as necessary. */
6647 ix86_sched_init (dump, sched_verbose)
6648 FILE *dump ATTRIBUTE_UNUSED;
6649 int sched_verbose ATTRIBUTE_UNUSED;
6651 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6654 /* Shift INSN to SLOT, and shift everything else down. */
6657 ix86_reorder_insn (insnp, slot)
6664 insnp[0] = insnp[1];
6665 while (++insnp != slot);
6670 /* Find an instruction with given pairability and minimal amount of cycles
6671 lost by the fact that the CPU waits for both pipelines to finish before
6672 reading next instructions. Also take care that both instructions together
6673 can not exceed 7 bytes. */
6676 ix86_pent_find_pair (e_ready, ready, type, first)
6679 enum attr_pent_pair type;
6682 int mincycles, cycles;
6683 enum attr_pent_pair tmp;
6684 enum attr_memory memory;
6685 rtx *insnp, *bestinsnp = NULL;
6687 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6690 memory = ix86_safe_memory (first);
6691 cycles = result_ready_cost (first);
6692 mincycles = INT_MAX;
6694 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6695 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6696 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6698 enum attr_memory second_memory;
6699 int secondcycles, currentcycles;
6701 second_memory = ix86_safe_memory (*insnp);
6702 secondcycles = result_ready_cost (*insnp);
6703 currentcycles = abs (cycles - secondcycles);
6705 if (secondcycles >= 1 && cycles >= 1)
6707 /* Two read/modify/write instructions together takes two
6709 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6712 /* Read modify/write instruction followed by read/modify
6713 takes one cycle longer. */
6714 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6715 && tmp != PENT_PAIR_UV
6716 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6719 if (currentcycles < mincycles)
6720 bestinsnp = insnp, mincycles = currentcycles;
6726 /* Subroutines of ix86_sched_reorder. */
6729 ix86_sched_reorder_pentium (ready, e_ready)
6733 enum attr_pent_pair pair1, pair2;
6736 /* This wouldn't be necessary if Haifa knew that static insn ordering
6737 is important to which pipe an insn is issued to. So we have to make
6738 some minor rearrangements. */
6740 pair1 = ix86_safe_pent_pair (*e_ready);
6742 /* If the first insn is non-pairable, let it be. */
6743 if (pair1 == PENT_PAIR_NP)
6746 pair2 = PENT_PAIR_NP;
6749 /* If the first insn is UV or PV pairable, search for a PU
6751 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6753 insnp = ix86_pent_find_pair (e_ready-1, ready,
6754 PENT_PAIR_PU, *e_ready);
6756 pair2 = PENT_PAIR_PU;
6759 /* If the first insn is PU or UV pairable, search for a PV
6761 if (pair2 == PENT_PAIR_NP
6762 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6764 insnp = ix86_pent_find_pair (e_ready-1, ready,
6765 PENT_PAIR_PV, *e_ready);
6767 pair2 = PENT_PAIR_PV;
6770 /* If the first insn is pairable, search for a UV
6772 if (pair2 == PENT_PAIR_NP)
6774 insnp = ix86_pent_find_pair (e_ready-1, ready,
6775 PENT_PAIR_UV, *e_ready);
6777 pair2 = PENT_PAIR_UV;
6780 if (pair2 == PENT_PAIR_NP)
6783 /* Found something! Decide if we need to swap the order. */
6784 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6785 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6786 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6787 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6788 ix86_reorder_insn (insnp, e_ready);
6790 ix86_reorder_insn (insnp, e_ready - 1);
6794 ix86_sched_reorder_ppro (ready, e_ready)
6799 enum attr_ppro_uops cur_uops;
6800 int issued_this_cycle;
6804 /* At this point .ppro.decode contains the state of the three
6805 decoders from last "cycle". That is, those insns that were
6806 actually independent. But here we're scheduling for the
6807 decoder, and we may find things that are decodable in the
6810 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
6811 issued_this_cycle = 0;
6814 cur_uops = ix86_safe_ppro_uops (*insnp);
6816 /* If the decoders are empty, and we've a complex insn at the
6817 head of the priority queue, let it issue without complaint. */
6818 if (decode[0] == NULL)
6820 if (cur_uops == PPRO_UOPS_MANY)
6826 /* Otherwise, search for a 2-4 uop unsn to issue. */
6827 while (cur_uops != PPRO_UOPS_FEW)
6831 cur_uops = ix86_safe_ppro_uops (*--insnp);
6834 /* If so, move it to the head of the line. */
6835 if (cur_uops == PPRO_UOPS_FEW)
6836 ix86_reorder_insn (insnp, e_ready);
6838 /* Issue the head of the queue. */
6839 issued_this_cycle = 1;
6840 decode[0] = *e_ready--;
6843 /* Look for simple insns to fill in the other two slots. */
6844 for (i = 1; i < 3; ++i)
6845 if (decode[i] == NULL)
6847 if (ready >= e_ready)
6851 cur_uops = ix86_safe_ppro_uops (*insnp);
6852 while (cur_uops != PPRO_UOPS_ONE)
6856 cur_uops = ix86_safe_ppro_uops (*--insnp);
6859 /* Found one. Move it to the head of the queue and issue it. */
6860 if (cur_uops == PPRO_UOPS_ONE)
6862 ix86_reorder_insn (insnp, e_ready);
6863 decode[i] = *e_ready--;
6864 issued_this_cycle++;
6868 /* ??? Didn't find one. Ideally, here we would do a lazy split
6869 of 2-uop insns, issue one and queue the other. */
6873 if (issued_this_cycle == 0)
6874 issued_this_cycle = 1;
6875 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6878 /* We are about to being issuing insns for this clock cycle.
6879 Override the default sort algorithm to better slot instructions. */
6881 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6882 FILE *dump ATTRIBUTE_UNUSED;
6883 int sched_verbose ATTRIBUTE_UNUSED;
6886 int clock_var ATTRIBUTE_UNUSED;
6888 rtx *e_ready = ready + n_ready - 1;
6898 case PROCESSOR_PENTIUM:
6899 ix86_sched_reorder_pentium (ready, e_ready);
6902 case PROCESSOR_PENTIUMPRO:
6903 ix86_sched_reorder_ppro (ready, e_ready);
6908 return ix86_issue_rate ();
6911 /* We are about to issue INSN. Return the number of insns left on the
6912 ready queue that can be issued this cycle. */
6915 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6925 return can_issue_more - 1;
6927 case PROCESSOR_PENTIUMPRO:
6929 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6931 if (uops == PPRO_UOPS_MANY)
6934 ix86_dump_ppro_packet (dump);
6935 ix86_sched_data.ppro.decode[0] = insn;
6936 ix86_sched_data.ppro.decode[1] = NULL;
6937 ix86_sched_data.ppro.decode[2] = NULL;
6939 ix86_dump_ppro_packet (dump);
6940 ix86_sched_data.ppro.decode[0] = NULL;
6942 else if (uops == PPRO_UOPS_FEW)
6945 ix86_dump_ppro_packet (dump);
6946 ix86_sched_data.ppro.decode[0] = insn;
6947 ix86_sched_data.ppro.decode[1] = NULL;
6948 ix86_sched_data.ppro.decode[2] = NULL;
6952 for (i = 0; i < 3; ++i)
6953 if (ix86_sched_data.ppro.decode[i] == NULL)
6955 ix86_sched_data.ppro.decode[i] = insn;
6963 ix86_dump_ppro_packet (dump);
6964 ix86_sched_data.ppro.decode[0] = NULL;
6965 ix86_sched_data.ppro.decode[1] = NULL;
6966 ix86_sched_data.ppro.decode[2] = NULL;
6970 return --ix86_sched_data.ppro.issued_this_cycle;
6974 /* Compute the alignment given to a constant that is being placed in memory.
6975 EXP is the constant and ALIGN is the alignment that the object would
6977 The value of this function is used instead of that alignment to align
6981 ix86_constant_alignment (exp, align)
6985 if (TREE_CODE (exp) == REAL_CST)
6987 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6989 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6992 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6999 /* Compute the alignment for a static variable.
7000 TYPE is the data type, and ALIGN is the alignment that
7001 the object would ordinarily have. The value of this function is used
7002 instead of that alignment to align the object. */
7005 ix86_data_alignment (type, align)
7009 if (AGGREGATE_TYPE_P (type)
7011 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7012 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7013 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7016 if (TREE_CODE (type) == ARRAY_TYPE)
7018 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7020 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7023 else if (TREE_CODE (type) == COMPLEX_TYPE)
7026 if (TYPE_MODE (type) == DCmode && align < 64)
7028 if (TYPE_MODE (type) == XCmode && align < 128)
7031 else if ((TREE_CODE (type) == RECORD_TYPE
7032 || TREE_CODE (type) == UNION_TYPE
7033 || TREE_CODE (type) == QUAL_UNION_TYPE)
7034 && TYPE_FIELDS (type))
7036 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7038 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7041 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7042 || TREE_CODE (type) == INTEGER_TYPE)
7044 if (TYPE_MODE (type) == DFmode && align < 64)
7046 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7053 /* Compute the alignment for a local variable.
7054 TYPE is the data type, and ALIGN is the alignment that
7055 the object would ordinarily have. The value of this macro is used
7056 instead of that alignment to align the object. */
7059 ix86_local_alignment (type, align)
7063 if (TREE_CODE (type) == ARRAY_TYPE)
7065 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7067 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7070 else if (TREE_CODE (type) == COMPLEX_TYPE)
7072 if (TYPE_MODE (type) == DCmode && align < 64)
7074 if (TYPE_MODE (type) == XCmode && align < 128)
7077 else if ((TREE_CODE (type) == RECORD_TYPE
7078 || TREE_CODE (type) == UNION_TYPE
7079 || TREE_CODE (type) == QUAL_UNION_TYPE)
7080 && TYPE_FIELDS (type))
7082 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7084 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7087 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7088 || TREE_CODE (type) == INTEGER_TYPE)
7091 if (TYPE_MODE (type) == DFmode && align < 64)
7093 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7099 #define def_builtin(NAME, TYPE, CODE) \
7100 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7101 struct builtin_description
7103 enum insn_code icode;
7105 enum ix86_builtins code;
7106 enum rtx_code comparison;
7110 static struct builtin_description bdesc_comi[] =
7112 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7113 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7114 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7115 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7116 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7117 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7118 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7119 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7120 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7121 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7122 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7123 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7126 static struct builtin_description bdesc_2arg[] =
7129 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7130 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7131 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7132 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7133 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7134 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7135 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7136 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7138 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7139 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7140 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7141 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7142 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7143 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7144 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7145 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7146 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7147 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7148 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7149 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7150 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7151 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7152 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7153 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7154 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7155 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7156 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7157 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7158 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7159 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7160 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7161 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7163 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7164 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7165 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7166 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7168 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7169 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7170 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7171 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7173 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7174 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7175 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7176 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7177 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7180 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7181 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7182 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7183 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7184 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7185 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7187 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7188 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7189 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7190 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7191 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7192 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7193 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7194 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7196 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7197 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7198 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7200 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7201 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7202 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7203 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7205 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7206 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7208 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7209 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7210 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7211 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7212 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7213 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7215 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7216 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7217 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7218 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7220 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7221 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7222 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7223 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7224 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7225 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7228 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7229 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7230 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7232 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7233 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7235 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7236 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7237 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7238 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7239 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7240 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7242 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7243 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7244 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7245 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7246 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7247 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7249 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7250 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7251 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7252 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7254 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7255 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7259 static struct builtin_description bdesc_1arg[] =
7261 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7262 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7264 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7265 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7266 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7268 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7269 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7270 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7271 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7275 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7276 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7279 ix86_init_builtins ()
7281 struct builtin_description * d;
7283 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
7285 tree pchar_type_node = build_pointer_type (char_type_node);
7286 tree pfloat_type_node = build_pointer_type (float_type_node);
7287 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7288 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7291 tree int_ftype_v4sf_v4sf
7292 = build_function_type (integer_type_node,
7293 tree_cons (NULL_TREE, V4SF_type_node,
7294 tree_cons (NULL_TREE,
7297 tree v4si_ftype_v4sf_v4sf
7298 = build_function_type (V4SI_type_node,
7299 tree_cons (NULL_TREE, V4SF_type_node,
7300 tree_cons (NULL_TREE,
7303 /* MMX/SSE/integer conversions. */
7304 tree int_ftype_v4sf_int
7305 = build_function_type (integer_type_node,
7306 tree_cons (NULL_TREE, V4SF_type_node,
7307 tree_cons (NULL_TREE,
7311 = build_function_type (integer_type_node,
7312 tree_cons (NULL_TREE, V4SF_type_node,
7315 = build_function_type (integer_type_node,
7316 tree_cons (NULL_TREE, V8QI_type_node,
7319 = build_function_type (integer_type_node,
7320 tree_cons (NULL_TREE, V2SI_type_node,
7323 = build_function_type (V2SI_type_node,
7324 tree_cons (NULL_TREE, integer_type_node,
7326 tree v4sf_ftype_v4sf_int
7327 = build_function_type (integer_type_node,
7328 tree_cons (NULL_TREE, V4SF_type_node,
7329 tree_cons (NULL_TREE, integer_type_node,
7331 tree v4sf_ftype_v4sf_v2si
7332 = build_function_type (V4SF_type_node,
7333 tree_cons (NULL_TREE, V4SF_type_node,
7334 tree_cons (NULL_TREE, V2SI_type_node,
7336 tree int_ftype_v4hi_int
7337 = build_function_type (integer_type_node,
7338 tree_cons (NULL_TREE, V4HI_type_node,
7339 tree_cons (NULL_TREE, integer_type_node,
7341 tree v4hi_ftype_v4hi_int_int
7342 = build_function_type (V4HI_type_node,
7343 tree_cons (NULL_TREE, V4HI_type_node,
7344 tree_cons (NULL_TREE, integer_type_node,
7345 tree_cons (NULL_TREE,
7348 /* Miscellaneous. */
7349 tree v8qi_ftype_v4hi_v4hi
7350 = build_function_type (V8QI_type_node,
7351 tree_cons (NULL_TREE, V4HI_type_node,
7352 tree_cons (NULL_TREE, V4HI_type_node,
7354 tree v4hi_ftype_v2si_v2si
7355 = build_function_type (V4HI_type_node,
7356 tree_cons (NULL_TREE, V2SI_type_node,
7357 tree_cons (NULL_TREE, V2SI_type_node,
7359 tree v4sf_ftype_v4sf_v4sf_int
7360 = build_function_type (V4SF_type_node,
7361 tree_cons (NULL_TREE, V4SF_type_node,
7362 tree_cons (NULL_TREE, V4SF_type_node,
7363 tree_cons (NULL_TREE,
7366 tree v4hi_ftype_v8qi_v8qi
7367 = build_function_type (V4HI_type_node,
7368 tree_cons (NULL_TREE, V8QI_type_node,
7369 tree_cons (NULL_TREE, V8QI_type_node,
7371 tree v2si_ftype_v4hi_v4hi
7372 = build_function_type (V2SI_type_node,
7373 tree_cons (NULL_TREE, V4HI_type_node,
7374 tree_cons (NULL_TREE, V4HI_type_node,
7376 tree v4hi_ftype_v4hi_int
7377 = build_function_type (V4HI_type_node,
7378 tree_cons (NULL_TREE, V4HI_type_node,
7379 tree_cons (NULL_TREE, integer_type_node,
7381 tree di_ftype_di_int
7382 = build_function_type (long_long_unsigned_type_node,
7383 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7384 tree_cons (NULL_TREE, integer_type_node,
7386 tree v8qi_ftype_v8qi_di
7387 = build_function_type (V8QI_type_node,
7388 tree_cons (NULL_TREE, V8QI_type_node,
7389 tree_cons (NULL_TREE,
7390 long_long_integer_type_node,
7392 tree v4hi_ftype_v4hi_di
7393 = build_function_type (V4HI_type_node,
7394 tree_cons (NULL_TREE, V4HI_type_node,
7395 tree_cons (NULL_TREE,
7396 long_long_integer_type_node,
7398 tree v2si_ftype_v2si_di
7399 = build_function_type (V2SI_type_node,
7400 tree_cons (NULL_TREE, V2SI_type_node,
7401 tree_cons (NULL_TREE,
7402 long_long_integer_type_node,
7404 tree void_ftype_void
7405 = build_function_type (void_type_node, endlink);
7406 tree void_ftype_pchar_int
7407 = build_function_type (void_type_node,
7408 tree_cons (NULL_TREE, pchar_type_node,
7409 tree_cons (NULL_TREE, integer_type_node,
7411 tree void_ftype_unsigned
7412 = build_function_type (void_type_node,
7413 tree_cons (NULL_TREE, unsigned_type_node,
7415 tree unsigned_ftype_void
7416 = build_function_type (unsigned_type_node, endlink);
7418 = build_function_type (long_long_unsigned_type_node, endlink);
7420 = build_function_type (intTI_type_node, endlink);
7421 tree v2si_ftype_v4sf
7422 = build_function_type (V2SI_type_node,
7423 tree_cons (NULL_TREE, V4SF_type_node,
7426 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7427 tree_cons (NULL_TREE, V8QI_type_node,
7428 tree_cons (NULL_TREE,
7431 tree void_ftype_v8qi_v8qi_pchar
7432 = build_function_type (void_type_node, maskmovq_args);
7433 tree v4sf_ftype_pfloat
7434 = build_function_type (V4SF_type_node,
7435 tree_cons (NULL_TREE, pfloat_type_node,
7437 tree v4sf_ftype_float
7438 = build_function_type (V4SF_type_node,
7439 tree_cons (NULL_TREE, float_type_node,
7441 tree v4sf_ftype_float_float_float_float
7442 = build_function_type (V4SF_type_node,
7443 tree_cons (NULL_TREE, float_type_node,
7444 tree_cons (NULL_TREE, float_type_node,
7445 tree_cons (NULL_TREE,
7447 tree_cons (NULL_TREE,
7450 /* @@@ the type is bogus */
7451 tree v4sf_ftype_v4sf_pv2si
7452 = build_function_type (V4SF_type_node,
7453 tree_cons (NULL_TREE, V4SF_type_node,
7454 tree_cons (NULL_TREE, pv2si_type_node,
7456 tree v4sf_ftype_pv2si_v4sf
7457 = build_function_type (V4SF_type_node,
7458 tree_cons (NULL_TREE, V4SF_type_node,
7459 tree_cons (NULL_TREE, pv2si_type_node,
7461 tree void_ftype_pfloat_v4sf
7462 = build_function_type (void_type_node,
7463 tree_cons (NULL_TREE, pfloat_type_node,
7464 tree_cons (NULL_TREE, V4SF_type_node,
7466 tree void_ftype_pdi_di
7467 = build_function_type (void_type_node,
7468 tree_cons (NULL_TREE, pdi_type_node,
7469 tree_cons (NULL_TREE,
7470 long_long_unsigned_type_node,
7472 /* Normal vector unops. */
7473 tree v4sf_ftype_v4sf
7474 = build_function_type (V4SF_type_node,
7475 tree_cons (NULL_TREE, V4SF_type_node,
7478 /* Normal vector binops. */
7479 tree v4sf_ftype_v4sf_v4sf
7480 = build_function_type (V4SF_type_node,
7481 tree_cons (NULL_TREE, V4SF_type_node,
7482 tree_cons (NULL_TREE, V4SF_type_node,
7484 tree v8qi_ftype_v8qi_v8qi
7485 = build_function_type (V8QI_type_node,
7486 tree_cons (NULL_TREE, V8QI_type_node,
7487 tree_cons (NULL_TREE, V8QI_type_node,
7489 tree v4hi_ftype_v4hi_v4hi
7490 = build_function_type (V4HI_type_node,
7491 tree_cons (NULL_TREE, V4HI_type_node,
7492 tree_cons (NULL_TREE, V4HI_type_node,
7494 tree v2si_ftype_v2si_v2si
7495 = build_function_type (V2SI_type_node,
7496 tree_cons (NULL_TREE, V2SI_type_node,
7497 tree_cons (NULL_TREE, V2SI_type_node,
7500 = build_function_type (intTI_type_node,
7501 tree_cons (NULL_TREE, intTI_type_node,
7502 tree_cons (NULL_TREE, intTI_type_node,
7505 = build_function_type (long_long_unsigned_type_node,
7506 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7507 tree_cons (NULL_TREE,
7508 long_long_unsigned_type_node,
7511 /* Add all builtins that are more or less simple operations on two
7513 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7515 /* Use one of the operands; the target can have a different mode for
7516 mask-generating compares. */
7517 enum machine_mode mode;
7522 mode = insn_data[d->icode].operand[1].mode;
7524 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7530 type = v4sf_ftype_v4sf_v4sf;
7533 type = v8qi_ftype_v8qi_v8qi;
7536 type = v4hi_ftype_v4hi_v4hi;
7539 type = v2si_ftype_v2si_v2si;
7542 type = ti_ftype_ti_ti;
7545 type = di_ftype_di_di;
7552 /* Override for comparisons. */
7553 if (d->icode == CODE_FOR_maskcmpv4sf3
7554 || d->icode == CODE_FOR_maskncmpv4sf3
7555 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7556 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7557 type = v4si_ftype_v4sf_v4sf;
7559 def_builtin (d->name, type, d->code);
7562 /* Add the remaining MMX insns with somewhat more complicated types. */
7563 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7564 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7565 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7566 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7567 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7568 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7569 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7570 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7571 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7573 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7574 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7575 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7577 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7578 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7580 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7581 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7583 /* Everything beyond this point is SSE only. */
7587 /* comi/ucomi insns. */
7588 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7589 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7591 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7592 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7593 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7595 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7596 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7597 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7598 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7599 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7600 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7602 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7603 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7605 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7607 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7608 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7609 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7610 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7611 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7612 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7614 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7615 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7616 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7617 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7619 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7620 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7621 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7622 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7624 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7625 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7627 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7629 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7630 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7631 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7632 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7633 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7634 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7636 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7638 /* Composite intrinsics. */
7639 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7640 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7641 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7642 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7643 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7644 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7645 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7648 /* Errors in the source file can cause expand_expr to return const0_rtx
7649 where we expect a vector. To avoid crashing, use one of the vector
7650 clear instructions. */
7652 safe_vector_operand (x, mode)
7654 enum machine_mode mode;
7656 if (x != const0_rtx)
7658 x = gen_reg_rtx (mode);
7660 if (VALID_MMX_REG_MODE (mode))
7661 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7662 : gen_rtx_SUBREG (DImode, x, 0)));
7664 emit_insn (gen_sse_clrti (mode == TImode ? x
7665 : gen_rtx_SUBREG (TImode, x, 0)));
7669 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7672 ix86_expand_binop_builtin (icode, arglist, target)
7673 enum insn_code icode;
7678 tree arg0 = TREE_VALUE (arglist);
7679 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7680 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7681 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7682 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7683 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7684 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7686 if (VECTOR_MODE_P (mode0))
7687 op0 = safe_vector_operand (op0, mode0);
7688 if (VECTOR_MODE_P (mode1))
7689 op1 = safe_vector_operand (op1, mode1);
7692 || GET_MODE (target) != tmode
7693 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7694 target = gen_reg_rtx (tmode);
7696 /* In case the insn wants input operands in modes different from
7697 the result, abort. */
7698 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
7701 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7702 op0 = copy_to_mode_reg (mode0, op0);
7703 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7704 op1 = copy_to_mode_reg (mode1, op1);
7706 pat = GEN_FCN (icode) (target, op0, op1);
7713 /* Subroutine of ix86_expand_builtin to take care of stores. */
7716 ix86_expand_store_builtin (icode, arglist, shuffle)
7717 enum insn_code icode;
7722 tree arg0 = TREE_VALUE (arglist);
7723 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7724 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7725 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7726 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
7727 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
7729 if (VECTOR_MODE_P (mode1))
7730 op1 = safe_vector_operand (op1, mode1);
7732 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7733 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
7734 op1 = copy_to_mode_reg (mode1, op1);
7736 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
7737 pat = GEN_FCN (icode) (op0, op1);
7743 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
7746 ix86_expand_unop_builtin (icode, arglist, target, do_load)
7747 enum insn_code icode;
7753 tree arg0 = TREE_VALUE (arglist);
7754 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7755 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7756 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7759 || GET_MODE (target) != tmode
7760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7761 target = gen_reg_rtx (tmode);
7763 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7766 if (VECTOR_MODE_P (mode0))
7767 op0 = safe_vector_operand (op0, mode0);
7769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7770 op0 = copy_to_mode_reg (mode0, op0);
7773 pat = GEN_FCN (icode) (target, op0);
7780 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
7781 sqrtss, rsqrtss, rcpss. */
7784 ix86_expand_unop1_builtin (icode, arglist, target)
7785 enum insn_code icode;
7790 tree arg0 = TREE_VALUE (arglist);
7791 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7792 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7793 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7796 || GET_MODE (target) != tmode
7797 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7798 target = gen_reg_rtx (tmode);
7800 if (VECTOR_MODE_P (mode0))
7801 op0 = safe_vector_operand (op0, mode0);
7803 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7804 op0 = copy_to_mode_reg (mode0, op0);
7806 pat = GEN_FCN (icode) (target, op0, op0);
7813 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
7816 ix86_expand_sse_compare (d, arglist, target)
7817 struct builtin_description *d;
7822 tree arg0 = TREE_VALUE (arglist);
7823 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7824 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7825 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7827 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
7828 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
7829 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
7830 enum rtx_code comparison = d->comparison;
7832 if (VECTOR_MODE_P (mode0))
7833 op0 = safe_vector_operand (op0, mode0);
7834 if (VECTOR_MODE_P (mode1))
7835 op1 = safe_vector_operand (op1, mode1);
7837 /* Swap operands if we have a comparison that isn't available in
7841 target = gen_reg_rtx (tmode);
7842 emit_move_insn (target, op1);
7845 comparison = swap_condition (comparison);
7848 || GET_MODE (target) != tmode
7849 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
7850 target = gen_reg_rtx (tmode);
7852 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
7853 op0 = copy_to_mode_reg (mode0, op0);
7854 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
7855 op1 = copy_to_mode_reg (mode1, op1);
7857 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7858 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
7865 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
7868 ix86_expand_sse_comi (d, arglist, target)
7869 struct builtin_description *d;
7874 tree arg0 = TREE_VALUE (arglist);
7875 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7876 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7877 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7879 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
7880 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
7881 enum rtx_code comparison = d->comparison;
7883 if (VECTOR_MODE_P (mode0))
7884 op0 = safe_vector_operand (op0, mode0);
7885 if (VECTOR_MODE_P (mode1))
7886 op1 = safe_vector_operand (op1, mode1);
7888 /* Swap operands if we have a comparison that isn't available in
7895 comparison = swap_condition (comparison);
7898 target = gen_reg_rtx (SImode);
7899 emit_move_insn (target, const0_rtx);
7900 target = gen_rtx_SUBREG (QImode, target, 0);
7902 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
7903 op0 = copy_to_mode_reg (mode0, op0);
7904 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
7905 op1 = copy_to_mode_reg (mode1, op1);
7907 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7908 pat = GEN_FCN (d->icode) (op0, op1, op2);
7912 emit_insn (gen_setcc_2 (target, op2));
7917 /* Expand an expression EXP that calls a built-in function,
7918 with result going to TARGET if that's convenient
7919 (and in mode MODE if that's convenient).
7920 SUBTARGET may be used as the target for computing one of EXP's operands.
7921 IGNORE is nonzero if the value is to be ignored. */
7924 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
7927 rtx subtarget ATTRIBUTE_UNUSED;
7928 enum machine_mode mode ATTRIBUTE_UNUSED;
7929 int ignore ATTRIBUTE_UNUSED;
7931 struct builtin_description *d;
7933 enum insn_code icode;
7934 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7935 tree arglist = TREE_OPERAND (exp, 1);
7936 tree arg0, arg1, arg2, arg3;
7937 rtx op0, op1, op2, pat;
7938 enum machine_mode tmode, mode0, mode1, mode2;
7939 int fcode = DECL_FUNCTION_CODE (fndecl);
7943 case IX86_BUILTIN_EMMS:
7944 emit_insn (gen_emms ());
7947 case IX86_BUILTIN_SFENCE:
7948 emit_insn (gen_sfence ());
7951 case IX86_BUILTIN_M_FROM_INT:
7952 target = gen_reg_rtx (DImode);
7953 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7954 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
7957 case IX86_BUILTIN_M_TO_INT:
7958 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7959 op0 = copy_to_mode_reg (DImode, op0);
7960 target = gen_reg_rtx (SImode);
7961 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
7964 case IX86_BUILTIN_PEXTRW:
7965 icode = CODE_FOR_mmx_pextrw;
7966 arg0 = TREE_VALUE (arglist);
7967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7968 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7969 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7970 tmode = insn_data[icode].operand[0].mode;
7971 mode0 = insn_data[icode].operand[1].mode;
7972 mode1 = insn_data[icode].operand[2].mode;
7974 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7975 op0 = copy_to_mode_reg (mode0, op0);
7976 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7978 /* @@@ better error message */
7979 error ("selector must be an immediate");
7983 || GET_MODE (target) != tmode
7984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7985 target = gen_reg_rtx (tmode);
7986 pat = GEN_FCN (icode) (target, op0, op1);
7992 case IX86_BUILTIN_PINSRW:
7993 icode = CODE_FOR_mmx_pinsrw;
7994 arg0 = TREE_VALUE (arglist);
7995 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7996 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7997 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7998 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7999 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8000 tmode = insn_data[icode].operand[0].mode;
8001 mode0 = insn_data[icode].operand[1].mode;
8002 mode1 = insn_data[icode].operand[2].mode;
8003 mode2 = insn_data[icode].operand[3].mode;
8005 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8006 op0 = copy_to_mode_reg (mode0, op0);
8007 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8008 op1 = copy_to_mode_reg (mode1, op1);
8009 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8011 /* @@@ better error message */
8012 error ("selector must be an immediate");
8016 || GET_MODE (target) != tmode
8017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8018 target = gen_reg_rtx (tmode);
8019 pat = GEN_FCN (icode) (target, op0, op1, op2);
8025 case IX86_BUILTIN_MASKMOVQ:
8026 icode = CODE_FOR_mmx_maskmovq;
8027 /* Note the arg order is different from the operand order. */
8028 arg1 = TREE_VALUE (arglist);
8029 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8030 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8031 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8032 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8033 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8034 mode0 = insn_data[icode].operand[0].mode;
8035 mode1 = insn_data[icode].operand[1].mode;
8036 mode2 = insn_data[icode].operand[2].mode;
8038 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8039 op0 = copy_to_mode_reg (mode0, op0);
8040 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8041 op1 = copy_to_mode_reg (mode1, op1);
8042 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8043 op2 = copy_to_mode_reg (mode2, op2);
8044 pat = GEN_FCN (icode) (op0, op1, op2);
8050 case IX86_BUILTIN_SQRTSS:
8051 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8052 case IX86_BUILTIN_RSQRTSS:
8053 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8054 case IX86_BUILTIN_RCPSS:
8055 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8057 case IX86_BUILTIN_LOADAPS:
8058 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8060 case IX86_BUILTIN_LOADUPS:
8061 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8063 case IX86_BUILTIN_STOREAPS:
8064 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8065 case IX86_BUILTIN_STOREUPS:
8066 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8068 case IX86_BUILTIN_LOADSS:
8069 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8071 case IX86_BUILTIN_STORESS:
8072 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8074 case IX86_BUILTIN_LOADHPS:
8075 case IX86_BUILTIN_LOADLPS:
8076 icode = (fcode == IX86_BUILTIN_LOADHPS
8077 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8078 arg0 = TREE_VALUE (arglist);
8079 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8080 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8081 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8082 tmode = insn_data[icode].operand[0].mode;
8083 mode0 = insn_data[icode].operand[1].mode;
8084 mode1 = insn_data[icode].operand[2].mode;
8086 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8087 op0 = copy_to_mode_reg (mode0, op0);
8088 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8090 || GET_MODE (target) != tmode
8091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8092 target = gen_reg_rtx (tmode);
8093 pat = GEN_FCN (icode) (target, op0, op1);
8099 case IX86_BUILTIN_STOREHPS:
8100 case IX86_BUILTIN_STORELPS:
8101 icode = (fcode == IX86_BUILTIN_STOREHPS
8102 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8103 arg0 = TREE_VALUE (arglist);
8104 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8105 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8106 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8107 mode0 = insn_data[icode].operand[1].mode;
8108 mode1 = insn_data[icode].operand[2].mode;
8110 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8111 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8112 op1 = copy_to_mode_reg (mode1, op1);
8114 pat = GEN_FCN (icode) (op0, op0, op1);
8120 case IX86_BUILTIN_MOVNTPS:
8121 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8122 case IX86_BUILTIN_MOVNTQ:
8123 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8125 case IX86_BUILTIN_LDMXCSR:
8126 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8127 target = assign_386_stack_local (SImode, 0);
8128 emit_move_insn (target, op0);
8129 emit_insn (gen_ldmxcsr (target));
8132 case IX86_BUILTIN_STMXCSR:
8133 target = assign_386_stack_local (SImode, 0);
8134 emit_insn (gen_stmxcsr (target));
8135 return copy_to_mode_reg (SImode, target);
8137 case IX86_BUILTIN_PREFETCH:
8138 icode = CODE_FOR_prefetch;
8139 arg0 = TREE_VALUE (arglist);
8140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8141 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8142 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8143 mode0 = insn_data[icode].operand[0].mode;
8144 mode1 = insn_data[icode].operand[1].mode;
8146 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8148 /* @@@ better error message */
8149 error ("selector must be an immediate");
8153 op0 = copy_to_mode_reg (Pmode, op0);
8154 pat = GEN_FCN (icode) (op0, op1);
8160 case IX86_BUILTIN_SHUFPS:
8161 icode = CODE_FOR_sse_shufps;
8162 arg0 = TREE_VALUE (arglist);
8163 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8164 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8165 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8166 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8167 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8168 tmode = insn_data[icode].operand[0].mode;
8169 mode0 = insn_data[icode].operand[1].mode;
8170 mode1 = insn_data[icode].operand[2].mode;
8171 mode2 = insn_data[icode].operand[3].mode;
8173 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8174 op0 = copy_to_mode_reg (mode0, op0);
8175 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8176 op1 = copy_to_mode_reg (mode1, op1);
8177 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8179 /* @@@ better error message */
8180 error ("mask must be an immediate");
8184 || GET_MODE (target) != tmode
8185 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8186 target = gen_reg_rtx (tmode);
8187 pat = GEN_FCN (icode) (target, op0, op1, op2);
8193 case IX86_BUILTIN_PSHUFW:
8194 icode = CODE_FOR_mmx_pshufw;
8195 arg0 = TREE_VALUE (arglist);
8196 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8197 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8198 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8199 tmode = insn_data[icode].operand[0].mode;
8200 mode0 = insn_data[icode].operand[2].mode;
8201 mode1 = insn_data[icode].operand[3].mode;
8203 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8204 op0 = copy_to_mode_reg (mode0, op0);
8205 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8207 /* @@@ better error message */
8208 error ("mask must be an immediate");
8212 || GET_MODE (target) != tmode
8213 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8214 target = gen_reg_rtx (tmode);
8215 pat = GEN_FCN (icode) (target, target, op0, op1);
8221 /* Composite intrinsics. */
8222 case IX86_BUILTIN_SETPS1:
8223 target = assign_386_stack_local (SFmode, 0);
8224 arg0 = TREE_VALUE (arglist);
8225 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8226 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8227 op0 = gen_reg_rtx (V4SFmode);
8228 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8229 XEXP (target, 0))));
8230 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8233 case IX86_BUILTIN_SETPS:
8234 target = assign_386_stack_local (V4SFmode, 0);
8235 op0 = change_address (target, SFmode, XEXP (target, 0));
8236 arg0 = TREE_VALUE (arglist);
8237 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8238 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8239 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8240 emit_move_insn (op0,
8241 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8242 emit_move_insn (adj_offsettable_operand (op0, 4),
8243 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8244 emit_move_insn (adj_offsettable_operand (op0, 8),
8245 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8246 emit_move_insn (adj_offsettable_operand (op0, 12),
8247 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8248 op0 = gen_reg_rtx (V4SFmode);
8249 emit_insn (gen_sse_movaps (op0, target));
8252 case IX86_BUILTIN_CLRPS:
8253 target = gen_reg_rtx (TImode);
8254 emit_insn (gen_sse_clrti (target));
8257 case IX86_BUILTIN_LOADRPS:
8258 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8259 gen_reg_rtx (V4SFmode), 1);
8260 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8263 case IX86_BUILTIN_LOADPS1:
8264 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8265 gen_reg_rtx (V4SFmode), 1);
8266 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8269 case IX86_BUILTIN_STOREPS1:
8270 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8271 case IX86_BUILTIN_STORERPS:
8272 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8274 case IX86_BUILTIN_MMX_ZERO:
8275 target = gen_reg_rtx (DImode);
8276 emit_insn (gen_mmx_clrdi (target));
8283 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8284 if (d->code == fcode)
8286 /* Compares are treated specially. */
8287 if (d->icode == CODE_FOR_maskcmpv4sf3
8288 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8289 || d->icode == CODE_FOR_maskncmpv4sf3
8290 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8291 return ix86_expand_sse_compare (d, arglist, target);
8293 return ix86_expand_binop_builtin (d->icode, arglist, target);
8296 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8297 if (d->code == fcode)
8298 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8300 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8301 if (d->code == fcode)
8302 return ix86_expand_sse_comi (d, arglist, target);
8305 /* @@@ Should really do something sensible here. */