1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
51 struct processor_costs i386_cost = { /* 386 specific costs */
52 1, /* cost of an add instruction */
53 1, /* cost of a lea instruction */
54 3, /* variable shift costs */
55 2, /* constant shift costs */
56 6, /* cost of starting a multiply */
57 1, /* cost of multiply per each bit set */
58 23, /* cost of a divide/mod */
59 15, /* "large" insn */
61 4, /* cost for loading QImode using movzbl */
62 {2, 4, 2}, /* cost of loading integer registers
63 in QImode, HImode and SImode.
64 Relative to reg-reg move (2). */
65 {2, 4, 2}, /* cost of storing integer registers */
66 2, /* cost of reg,reg fld/fst */
67 {8, 8, 8}, /* cost of loading fp registers
68 in SFmode, DFmode and XFmode */
69 {8, 8, 8}, /* cost of loading integer registers */
70 2, /* cost of moving MMX register */
71 {4, 8}, /* cost of loading MMX registers
72 in SImode and DImode */
73 {4, 8}, /* cost of storing MMX registers
74 in SImode and DImode */
75 2, /* cost of moving SSE register */
76 {4, 8, 16}, /* cost of loading SSE registers
77 in SImode, DImode and TImode */
78 {4, 8, 16}, /* cost of storing SSE registers
79 in SImode, DImode and TImode */
80 3, /* MMX or SSE register to integer */
83 struct processor_costs i486_cost = { /* 486 specific costs */
84 1, /* cost of an add instruction */
85 1, /* cost of a lea instruction */
86 3, /* variable shift costs */
87 2, /* constant shift costs */
88 12, /* cost of starting a multiply */
89 1, /* cost of multiply per each bit set */
90 40, /* cost of a divide/mod */
91 15, /* "large" insn */
93 4, /* cost for loading QImode using movzbl */
94 {2, 4, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 4, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {8, 8, 8}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {8, 8, 8}, /* cost of loading integer registers */
102 2, /* cost of moving MMX register */
103 {4, 8}, /* cost of loading MMX registers
104 in SImode and DImode */
105 {4, 8}, /* cost of storing MMX registers
106 in SImode and DImode */
107 2, /* cost of moving SSE register */
108 {4, 8, 16}, /* cost of loading SSE registers
109 in SImode, DImode and TImode */
110 {4, 8, 16}, /* cost of storing SSE registers
111 in SImode, DImode and TImode */
112 3 /* MMX or SSE register to integer */
115 struct processor_costs pentium_cost = {
116 1, /* cost of an add instruction */
117 1, /* cost of a lea instruction */
118 4, /* variable shift costs */
119 1, /* constant shift costs */
120 11, /* cost of starting a multiply */
121 0, /* cost of multiply per each bit set */
122 25, /* cost of a divide/mod */
123 8, /* "large" insn */
125 6, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {2, 2, 6}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {4, 4, 6}, /* cost of loading integer registers */
134 8, /* cost of moving MMX register */
135 {8, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {8, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3 /* MMX or SSE register to integer */
147 struct processor_costs pentiumpro_cost = {
148 1, /* cost of an add instruction */
149 1, /* cost of a lea instruction */
150 1, /* variable shift costs */
151 1, /* constant shift costs */
152 4, /* cost of starting a multiply */
153 0, /* cost of multiply per each bit set */
154 17, /* cost of a divide/mod */
155 8, /* "large" insn */
157 2, /* cost for loading QImode using movzbl */
158 {4, 4, 4}, /* cost of loading integer registers
159 in QImode, HImode and SImode.
160 Relative to reg-reg move (2). */
161 {2, 2, 2}, /* cost of storing integer registers */
162 2, /* cost of reg,reg fld/fst */
163 {2, 2, 6}, /* cost of loading fp registers
164 in SFmode, DFmode and XFmode */
165 {4, 4, 6}, /* cost of loading integer registers */
166 2, /* cost of moving MMX register */
167 {2, 2}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {2, 2}, /* cost of storing MMX registers
170 in SImode and DImode */
171 2, /* cost of moving SSE register */
172 {2, 2, 8}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {2, 2, 8}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3 /* MMX or SSE register to integer */
179 struct processor_costs k6_cost = {
180 1, /* cost of an add instruction */
181 2, /* cost of a lea instruction */
182 1, /* variable shift costs */
183 1, /* constant shift costs */
184 3, /* cost of starting a multiply */
185 0, /* cost of multiply per each bit set */
186 18, /* cost of a divide/mod */
187 8, /* "large" insn */
189 3, /* cost for loading QImode using movzbl */
190 {4, 5, 4}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
192 Relative to reg-reg move (2). */
193 {2, 3, 2}, /* cost of storing integer registers */
194 4, /* cost of reg,reg fld/fst */
195 {6, 6, 6}, /* cost of loading fp registers
196 in SFmode, DFmode and XFmode */
197 {4, 4, 4}, /* cost of loading integer registers */
198 2, /* cost of moving MMX register */
199 {2, 2}, /* cost of loading MMX registers
200 in SImode and DImode */
201 {2, 2}, /* cost of storing MMX registers
202 in SImode and DImode */
203 2, /* cost of moving SSE register */
204 {2, 2, 8}, /* cost of loading SSE registers
205 in SImode, DImode and TImode */
206 {2, 2, 8}, /* cost of storing SSE registers
207 in SImode, DImode and TImode */
208 6 /* MMX or SSE register to integer */
211 struct processor_costs athlon_cost = {
212 1, /* cost of an add instruction */
213 2, /* cost of a lea instruction */
214 1, /* variable shift costs */
215 1, /* constant shift costs */
216 5, /* cost of starting a multiply */
217 0, /* cost of multiply per each bit set */
218 42, /* cost of a divide/mod */
219 8, /* "large" insn */
221 4, /* cost for loading QImode using movzbl */
222 {4, 5, 4}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
224 Relative to reg-reg move (2). */
225 {2, 3, 2}, /* cost of storing integer registers */
226 4, /* cost of reg,reg fld/fst */
227 {6, 6, 20}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
229 {4, 4, 16}, /* cost of loading integer registers */
230 2, /* cost of moving MMX register */
231 {2, 2}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {2, 2}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {2, 2, 8}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {2, 2, 8}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 6 /* MMX or SSE register to integer */
243 struct processor_costs pentium4_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 8, /* variable shift costs */
247 8, /* constant shift costs */
248 30, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 112, /* cost of a divide/mod */
251 16, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 5, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 3, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 12, /* cost of moving SSE register */
268 {12, 12, 12}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 10, /* MMX or SSE register to integer */
275 struct processor_costs *ix86_cost = &pentium_cost;
277 /* Processor feature/optimization bitmasks. */
278 #define m_386 (1<<PROCESSOR_I386)
279 #define m_486 (1<<PROCESSOR_I486)
280 #define m_PENT (1<<PROCESSOR_PENTIUM)
281 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
282 #define m_K6 (1<<PROCESSOR_K6)
283 #define m_ATHLON (1<<PROCESSOR_ATHLON)
284 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
286 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
287 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
288 const int x86_zero_extend_with_and = m_486 | m_PENT;
289 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
290 const int x86_double_with_add = ~m_386;
291 const int x86_use_bit_test = m_386;
292 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
293 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
294 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
295 const int x86_branch_hints = m_PENT4;
296 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
297 const int x86_partial_reg_stall = m_PPRO;
298 const int x86_use_loop = m_K6;
299 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
300 const int x86_use_mov0 = m_K6;
301 const int x86_use_cltd = ~(m_PENT | m_K6);
302 const int x86_read_modify_write = ~m_PENT;
303 const int x86_read_modify = ~(m_PENT | m_PPRO);
304 const int x86_split_long_moves = m_PPRO;
305 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
306 const int x86_single_stringop = m_386 | m_PENT4;
307 const int x86_qimode_math = ~(0);
308 const int x86_promote_qi_regs = 0;
309 const int x86_himode_math = ~(m_PPRO);
310 const int x86_promote_hi_regs = m_PPRO;
311 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
312 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
313 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
314 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
315 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
316 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
317 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
318 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
319 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
320 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
322 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
324 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
325 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
326 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
328 /* Array of the smallest class containing reg number REGNO, indexed by
329 REGNO. Used by REGNO_REG_CLASS in i386.h. */
331 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
334 AREG, DREG, CREG, BREG,
336 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
338 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
339 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
342 /* flags, fpsr, dirflag, frame */
343 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
346 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
348 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
349 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
350 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
354 /* The "default" register map used in 32bit mode. */
356 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
358 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
359 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
360 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
361 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
362 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
363 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
364 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
367 /* The "default" register map used in 64bit mode. */
368 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
370 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
371 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
372 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
373 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
374 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
375 8,9,10,11,12,13,14,15, /* extended integer registers */
376 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
379 /* Define the register numbers to be used in Dwarf debugging information.
380 The SVR4 reference port C compiler uses the following register numbers
381 in its Dwarf output code:
382 0 for %eax (gcc regno = 0)
383 1 for %ecx (gcc regno = 2)
384 2 for %edx (gcc regno = 1)
385 3 for %ebx (gcc regno = 3)
386 4 for %esp (gcc regno = 7)
387 5 for %ebp (gcc regno = 6)
388 6 for %esi (gcc regno = 4)
389 7 for %edi (gcc regno = 5)
390 The following three DWARF register numbers are never generated by
391 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
392 believes these numbers have these meanings.
393 8 for %eip (no gcc equivalent)
394 9 for %eflags (gcc regno = 17)
395 10 for %trapno (no gcc equivalent)
396 It is not at all clear how we should number the FP stack registers
397 for the x86 architecture. If the version of SDB on x86/svr4 were
398 a bit less brain dead with respect to floating-point then we would
399 have a precedent to follow with respect to DWARF register numbers
400 for x86 FP registers, but the SDB on x86/svr4 is so completely
401 broken with respect to FP registers that it is hardly worth thinking
402 of it as something to strive for compatibility with.
403 The version of x86/svr4 SDB I have at the moment does (partially)
404 seem to believe that DWARF register number 11 is associated with
405 the x86 register %st(0), but that's about all. Higher DWARF
406 register numbers don't seem to be associated with anything in
407 particular, and even for DWARF regno 11, SDB only seems to under-
408 stand that it should say that a variable lives in %st(0) (when
409 asked via an `=' command) if we said it was in DWARF regno 11,
410 but SDB still prints garbage when asked for the value of the
411 variable in question (via a `/' command).
412 (Also note that the labels SDB prints for various FP stack regs
413 when doing an `x' command are all wrong.)
414 Note that these problems generally don't affect the native SVR4
415 C compiler because it doesn't allow the use of -O with -g and
416 because when it is *not* optimizing, it allocates a memory
417 location for each floating-point variable, and the memory
418 location is what gets described in the DWARF AT_location
419 attribute for the variable in question.
420 Regardless of the severe mental illness of the x86/svr4 SDB, we
421 do something sensible here and we use the following DWARF
422 register numbers. Note that these are all stack-top-relative
424 11 for %st(0) (gcc regno = 8)
425 12 for %st(1) (gcc regno = 9)
426 13 for %st(2) (gcc regno = 10)
427 14 for %st(3) (gcc regno = 11)
428 15 for %st(4) (gcc regno = 12)
429 16 for %st(5) (gcc regno = 13)
430 17 for %st(6) (gcc regno = 14)
431 18 for %st(7) (gcc regno = 15)
433 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
435 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
436 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
437 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
438 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
439 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
440 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
441 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
444 /* Test and compare insns in i386.md store the information needed to
445 generate branch and scc insns here. */
447 struct rtx_def *ix86_compare_op0 = NULL_RTX;
448 struct rtx_def *ix86_compare_op1 = NULL_RTX;
450 #define MAX_386_STACK_LOCALS 3
451 /* Size of the register save area. */
452 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
454 /* Define the structure for the machine field in struct function. */
455 struct machine_function
457 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
458 int save_varrargs_registers;
459 int accesses_prev_frame;
462 #define ix86_stack_locals (cfun->machine->stack_locals)
463 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
465 /* Structure describing stack frame layout.
466 Stack grows downward:
472 saved frame pointer if frame_pointer_needed
473 <- HARD_FRAME_POINTER
479 > to_allocate <- FRAME_POINTER
491 int outgoing_arguments_size;
494 HOST_WIDE_INT to_allocate;
495 /* The offsets relative to ARG_POINTER. */
496 HOST_WIDE_INT frame_pointer_offset;
497 HOST_WIDE_INT hard_frame_pointer_offset;
498 HOST_WIDE_INT stack_pointer_offset;
501 /* Code model option as passed by user. */
502 const char *ix86_cmodel_string;
504 enum cmodel ix86_cmodel;
506 /* which cpu are we scheduling for */
507 enum processor_type ix86_cpu;
509 /* which instruction set architecture to use. */
512 /* Strings to hold which cpu and instruction set architecture to use. */
513 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
514 const char *ix86_arch_string; /* for -march=<xxx> */
516 /* # of registers to use to pass arguments. */
517 const char *ix86_regparm_string;
519 /* ix86_regparm_string as a number */
522 /* Alignment to use for loops and jumps: */
524 /* Power of two alignment for loops. */
525 const char *ix86_align_loops_string;
527 /* Power of two alignment for non-loop jumps. */
528 const char *ix86_align_jumps_string;
530 /* Power of two alignment for stack boundary in bytes. */
531 const char *ix86_preferred_stack_boundary_string;
533 /* Preferred alignment for stack boundary in bits. */
534 int ix86_preferred_stack_boundary;
536 /* Values 1-5: see jump.c */
537 int ix86_branch_cost;
538 const char *ix86_branch_cost_string;
540 /* Power of two alignment for functions. */
541 const char *ix86_align_funcs_string;
543 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
544 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
546 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
547 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
549 static rtx gen_push PARAMS ((rtx));
550 static int memory_address_length PARAMS ((rtx addr));
551 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
552 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
553 static int ix86_safe_length PARAMS ((rtx));
554 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
555 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
556 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
557 static void ix86_dump_ppro_packet PARAMS ((FILE *));
558 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
559 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
561 static void ix86_init_machine_status PARAMS ((struct function *));
562 static void ix86_mark_machine_status PARAMS ((struct function *));
563 static void ix86_free_machine_status PARAMS ((struct function *));
564 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
565 static int ix86_safe_length_prefix PARAMS ((rtx));
566 static int ix86_nsaved_regs PARAMS((void));
567 static void ix86_emit_save_regs PARAMS((void));
568 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
569 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
570 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
571 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
572 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
573 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
574 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
575 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
576 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
580 rtx base, index, disp;
584 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
586 struct builtin_description;
587 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
589 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
591 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
592 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
593 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
594 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
595 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
596 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
597 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
601 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
603 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
604 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
605 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
606 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
607 static int ix86_save_reg PARAMS ((int, int));
608 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
609 static int ix86_comp_type_attributes PARAMS ((tree, tree));
611 #ifdef DO_GLOBAL_CTORS_BODY
612 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
614 #if defined(TARGET_ELF) && defined(TARGET_COFF)
615 static void sco_asm_named_section PARAMS ((const char *, unsigned int));
616 static void sco_asm_out_constructor PARAMS ((rtx, int));
619 /* Initialize the GCC target structure. */
620 #undef TARGET_VALID_TYPE_ATTRIBUTE
621 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
622 # define TARGET_VALID_TYPE_ATTRIBUTE i386_pe_valid_type_attribute_p
623 # undef TARGET_VALID_DECL_ATTRIBUTE
624 # define TARGET_VALID_DECL_ATTRIBUTE i386_pe_valid_decl_attribute_p
625 # undef TARGET_MERGE_DECL_ATTRIBUTES
626 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
628 # define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
631 #undef TARGET_COMP_TYPE_ATTRIBUTES
632 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
634 #undef TARGET_INIT_BUILTINS
635 #define TARGET_INIT_BUILTINS ix86_init_builtins
637 #undef TARGET_EXPAND_BUILTIN
638 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
640 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
641 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
643 # undef TARGET_ASM_FUNCTION_PROLOGUE
644 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
647 #undef TARGET_ASM_OPEN_PAREN
648 #define TARGET_ASM_OPEN_PAREN ""
649 #undef TARGET_ASM_CLOSE_PAREN
650 #define TARGET_ASM_CLOSE_PAREN ""
652 struct gcc_target targetm = TARGET_INITIALIZER;
654 /* Sometimes certain combinations of command options do not make
655 sense on a particular target machine. You can define a macro
656 `OVERRIDE_OPTIONS' to take account of this. This macro, if
657 defined, is executed once just after all the command options have
660 Don't use this macro to turn on various extra optimizations for
661 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
667 /* Comes from final.c -- no real reason to change it. */
668 #define MAX_CODE_ALIGN 16
672 struct processor_costs *cost; /* Processor costs */
673 int target_enable; /* Target flags to enable. */
674 int target_disable; /* Target flags to disable. */
675 int align_loop; /* Default alignments. */
680 const processor_target_table[PROCESSOR_max] =
682 {&i386_cost, 0, 0, 2, 2, 2, 1},
683 {&i486_cost, 0, 0, 4, 4, 4, 1},
684 {&pentium_cost, 0, 0, -4, -4, -4, 1},
685 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
686 {&k6_cost, 0, 0, -5, -5, 4, 1},
687 {&athlon_cost, 0, 0, 4, -4, 4, 1},
688 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
693 const char *name; /* processor name or nickname. */
694 enum processor_type processor;
696 const processor_alias_table[] =
698 {"i386", PROCESSOR_I386},
699 {"i486", PROCESSOR_I486},
700 {"i586", PROCESSOR_PENTIUM},
701 {"pentium", PROCESSOR_PENTIUM},
702 {"i686", PROCESSOR_PENTIUMPRO},
703 {"pentiumpro", PROCESSOR_PENTIUMPRO},
704 {"k6", PROCESSOR_K6},
705 {"athlon", PROCESSOR_ATHLON},
706 {"pentium4", PROCESSOR_PENTIUM4},
709 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
711 #ifdef SUBTARGET_OVERRIDE_OPTIONS
712 SUBTARGET_OVERRIDE_OPTIONS;
715 ix86_arch = PROCESSOR_I386;
716 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
718 if (ix86_cmodel_string != 0)
720 if (!strcmp (ix86_cmodel_string, "small"))
721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
723 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
724 else if (!strcmp (ix86_cmodel_string, "32"))
726 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
727 ix86_cmodel = CM_KERNEL;
728 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
729 ix86_cmodel = CM_MEDIUM;
730 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
731 ix86_cmodel = CM_LARGE;
733 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
739 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
741 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
742 error ("Code model `%s' not supported in the %s bit mode.",
743 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
744 if (ix86_cmodel == CM_LARGE)
745 sorry ("Code model `large' not supported yet.");
746 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
747 sorry ("%i-bit mode not compiled in.",
748 (target_flags & MASK_64BIT) ? 64 : 32);
750 if (ix86_arch_string != 0)
752 for (i = 0; i < pta_size; i++)
753 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
755 ix86_arch = processor_alias_table[i].processor;
756 /* Default cpu tuning to the architecture. */
757 ix86_cpu = ix86_arch;
762 error ("bad value (%s) for -march= switch", ix86_arch_string);
765 if (ix86_cpu_string != 0)
767 for (i = 0; i < pta_size; i++)
768 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
770 ix86_cpu = processor_alias_table[i].processor;
774 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
777 ix86_cost = processor_target_table[ix86_cpu].cost;
778 target_flags |= processor_target_table[ix86_cpu].target_enable;
779 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
781 /* Arrange to set up i386_stack_locals for all functions. */
782 init_machine_status = ix86_init_machine_status;
783 mark_machine_status = ix86_mark_machine_status;
784 free_machine_status = ix86_free_machine_status;
786 /* Validate -mregparm= value. */
787 if (ix86_regparm_string)
789 i = atoi (ix86_regparm_string);
790 if (i < 0 || i > REGPARM_MAX)
791 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
797 ix86_regparm = REGPARM_MAX;
799 /* If the user has provided any of the -malign-* options,
800 warn and use that value only if -falign-* is not set.
801 Remove this code in GCC 3.2 or later. */
802 if (ix86_align_loops_string)
804 warning ("-malign-loops is obsolete, use -falign-loops");
805 if (align_loops == 0)
807 i = atoi (ix86_align_loops_string);
808 if (i < 0 || i > MAX_CODE_ALIGN)
809 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
811 align_loops = 1 << i;
815 if (ix86_align_jumps_string)
817 warning ("-malign-jumps is obsolete, use -falign-jumps");
818 if (align_jumps == 0)
820 i = atoi (ix86_align_jumps_string);
821 if (i < 0 || i > MAX_CODE_ALIGN)
822 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
824 align_jumps = 1 << i;
828 if (ix86_align_funcs_string)
830 warning ("-malign-functions is obsolete, use -falign-functions");
831 if (align_functions == 0)
833 i = atoi (ix86_align_funcs_string);
834 if (i < 0 || i > MAX_CODE_ALIGN)
835 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
837 align_functions = 1 << i;
841 /* Default align_* from the processor table. */
842 #define abs(n) (n < 0 ? -n : n)
843 if (align_loops == 0)
844 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
845 if (align_jumps == 0)
846 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
847 if (align_functions == 0)
848 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
850 /* Validate -mpreferred-stack-boundary= value, or provide default.
851 The default of 128 bits is for Pentium III's SSE __m128. */
852 ix86_preferred_stack_boundary = 128;
853 if (ix86_preferred_stack_boundary_string)
855 i = atoi (ix86_preferred_stack_boundary_string);
856 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
857 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
858 TARGET_64BIT ? 3 : 2);
860 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
863 /* Validate -mbranch-cost= value, or provide default. */
864 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
865 if (ix86_branch_cost_string)
867 i = atoi (ix86_branch_cost_string);
869 error ("-mbranch-cost=%d is not between 0 and 5", i);
871 ix86_branch_cost = i;
874 /* Keep nonleaf frame pointers. */
875 if (TARGET_OMIT_LEAF_FRAME_POINTER)
876 flag_omit_frame_pointer = 1;
878 /* If we're doing fast math, we don't care about comparison order
879 wrt NaNs. This lets us use a shorter comparison sequence. */
880 if (flag_unsafe_math_optimizations)
881 target_flags &= ~MASK_IEEE_FP;
883 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
886 target_flags |= MASK_MMX;
888 if ((x86_accumulate_outgoing_args & CPUMASK)
889 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
891 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
895 optimization_options (level, size)
897 int size ATTRIBUTE_UNUSED;
899 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
900 make the problem with not enough registers even worse. */
901 #ifdef INSN_SCHEDULING
903 flag_schedule_insns = 0;
907 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
908 attribute for TYPE. The attributes in ATTRIBUTES have previously been
912 ix86_valid_type_attribute_p (type, attributes, identifier, args)
914 tree attributes ATTRIBUTE_UNUSED;
918 if (TREE_CODE (type) != FUNCTION_TYPE
919 && TREE_CODE (type) != METHOD_TYPE
920 && TREE_CODE (type) != FIELD_DECL
921 && TREE_CODE (type) != TYPE_DECL)
924 /* Stdcall attribute says callee is responsible for popping arguments
925 if they are not variable. */
926 if (is_attribute_p ("stdcall", identifier)
928 return (args == NULL_TREE);
930 /* Cdecl attribute says the callee is a normal C declaration. */
931 if (is_attribute_p ("cdecl", identifier)
933 return (args == NULL_TREE);
935 /* Regparm attribute specifies how many integer arguments are to be
936 passed in registers. */
937 if (is_attribute_p ("regparm", identifier))
941 if (! args || TREE_CODE (args) != TREE_LIST
942 || TREE_CHAIN (args) != NULL_TREE
943 || TREE_VALUE (args) == NULL_TREE)
946 cst = TREE_VALUE (args);
947 if (TREE_CODE (cst) != INTEGER_CST)
950 if (compare_tree_int (cst, REGPARM_MAX) > 0)
959 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
961 /* Generate the assembly code for function entry. FILE is a stdio
962 stream to output the code to. SIZE is an int: how many units of
963 temporary storage to allocate.
965 Refer to the array `regs_ever_live' to determine which registers to
966 save; `regs_ever_live[I]' is nonzero if register number I is ever
967 used in the function. This function is responsible for knowing
968 which registers should not be saved even if used.
970 We override it here to allow for the new profiling code to go before
971 the prologue and the old mcount code to go after the prologue (and
972 after %ebx has been set up for ELF shared library support). */
975 ix86_osf_output_function_prologue (file, size)
980 char *lprefix = LPREFIX;
981 int labelno = profile_label_no;
985 if (TARGET_UNDERSCORES)
988 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
990 if (!flag_pic && !HALF_PIC_P ())
992 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
993 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
996 else if (HALF_PIC_P ())
1000 HALF_PIC_EXTERNAL ("_mcount_ptr");
1001 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1004 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1005 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1007 fprintf (file, "\tcall *(%%eax)\n");
1012 static int call_no = 0;
1014 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1015 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1016 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1017 lprefix, call_no++);
1018 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1020 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1022 fprintf (file, "\tcall *(%%eax)\n");
1028 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1032 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1033 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1038 static int call_no = 0;
1040 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1041 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1042 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1043 lprefix, call_no++);
1044 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1046 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1048 fprintf (file, "\tcall *(%%eax)\n");
1051 #endif /* !OSF_OS */
1053 function_prologue (file, size);
1056 #endif /* OSF_OS || TARGET_OSF1ELF */
1058 /* Return 0 if the attributes for two types are incompatible, 1 if they
1059 are compatible, and 2 if they are nearly compatible (which causes a
1060 warning to be generated). */
1063 ix86_comp_type_attributes (type1, type2)
1067 /* Check for mismatch of non-default calling convention. */
1068 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1070 if (TREE_CODE (type1) != FUNCTION_TYPE)
1073 /* Check for mismatched return types (cdecl vs stdcall). */
1074 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1075 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1080 /* Value is the number of bytes of arguments automatically
1081 popped when returning from a subroutine call.
1082 FUNDECL is the declaration node of the function (as a tree),
1083 FUNTYPE is the data type of the function (as a tree),
1084 or for a library call it is an identifier node for the subroutine name.
1085 SIZE is the number of bytes of arguments passed on the stack.
1087 On the 80386, the RTD insn may be used to pop them if the number
1088 of args is fixed, but if the number is variable then the caller
1089 must pop them all. RTD can't be used for library calls now
1090 because the library is compiled with the Unix compiler.
1091 Use of RTD is a selectable option, since it is incompatible with
1092 standard Unix calling sequences. If the option is not selected,
1093 the caller must always pop the args.
1095 The attribute stdcall is equivalent to RTD on a per module basis. */
1098 ix86_return_pops_args (fundecl, funtype, size)
1103 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1105 /* Cdecl functions override -mrtd, and never pop the stack. */
1106 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1108 /* Stdcall functions will pop the stack if not variable args. */
1109 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1113 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1114 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1115 == void_type_node)))
1119 /* Lose any fake structure return argument. */
1120 if (aggregate_value_p (TREE_TYPE (funtype))
1122 return GET_MODE_SIZE (Pmode);
1127 /* Argument support functions. */
1129 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1130 for a call to a function whose data type is FNTYPE.
1131 For a library call, FNTYPE is 0. */
1134 init_cumulative_args (cum, fntype, libname)
1135 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1136 tree fntype; /* tree ptr for function decl */
1137 rtx libname; /* SYMBOL_REF of library name or 0 */
1139 static CUMULATIVE_ARGS zero_cum;
1140 tree param, next_param;
1142 if (TARGET_DEBUG_ARG)
1144 fprintf (stderr, "\ninit_cumulative_args (");
1146 fprintf (stderr, "fntype code = %s, ret code = %s",
1147 tree_code_name[(int) TREE_CODE (fntype)],
1148 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1150 fprintf (stderr, "no fntype");
1153 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1158 /* Set up the number of registers to use for passing arguments. */
1159 cum->nregs = ix86_regparm;
1162 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1165 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1168 /* Determine if this function has variable arguments. This is
1169 indicated by the last argument being 'void_type_mode' if there
1170 are no variable arguments. If there are variable arguments, then
1171 we won't pass anything in registers */
1175 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1176 param != 0; param = next_param)
1178 next_param = TREE_CHAIN (param);
1179 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1184 if (TARGET_DEBUG_ARG)
1185 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1190 /* Update the data in CUM to advance over an argument
1191 of mode MODE and data type TYPE.
1192 (TYPE is null for libcalls where that information may not be available.) */
1195 function_arg_advance (cum, mode, type, named)
1196 CUMULATIVE_ARGS *cum; /* current arg information */
1197 enum machine_mode mode; /* current arg mode */
1198 tree type; /* type of the argument or 0 if lib support */
1199 int named; /* whether or not the argument was named */
1202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1203 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1205 if (TARGET_DEBUG_ARG)
1207 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1208 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1209 if (TARGET_SSE && mode == TImode)
1211 cum->sse_words += words;
1212 cum->sse_nregs -= 1;
1213 cum->sse_regno += 1;
1214 if (cum->sse_nregs <= 0)
1222 cum->words += words;
1223 cum->nregs -= words;
1224 cum->regno += words;
1226 if (cum->nregs <= 0)
1235 /* Define where to put the arguments to a function.
1236 Value is zero to push the argument on the stack,
1237 or a hard register in which to store the argument.
1239 MODE is the argument's machine mode.
1240 TYPE is the data type of the argument (as a tree).
1241 This is null for libcalls where that information may
1243 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1244 the preceding args and about the function being called.
1245 NAMED is nonzero if this argument is a named parameter
1246 (otherwise it is an extra parameter matching an ellipsis). */
1249 function_arg (cum, mode, type, named)
1250 CUMULATIVE_ARGS *cum; /* current arg information */
1251 enum machine_mode mode; /* current arg mode */
1252 tree type; /* type of the argument or 0 if lib support */
1253 int named; /* != 0 for normal args, == 0 for ... args */
1257 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1258 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1260 if (mode == VOIDmode)
1265 /* For now, pass fp/complex values on the stack. */
1274 if (words <= cum->nregs)
1275 ret = gen_rtx_REG (mode, cum->regno);
1279 ret = gen_rtx_REG (mode, cum->sse_regno);
1283 if (TARGET_DEBUG_ARG)
1286 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1287 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1290 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1292 fprintf (stderr, ", stack");
1294 fprintf (stderr, " )\n");
1301 /* Return nonzero if OP is general operand representable on x86_64. */
1304 x86_64_general_operand (op, mode)
1306 enum machine_mode mode;
1309 return general_operand (op, mode);
1310 if (nonimmediate_operand (op, mode))
1312 return x86_64_sign_extended_value (op);
1315 /* Return nonzero if OP is general operand representable on x86_64
1316 as eighter sign extended or zero extended constant. */
1319 x86_64_szext_general_operand (op, mode)
1321 enum machine_mode mode;
1324 return general_operand (op, mode);
1325 if (nonimmediate_operand (op, mode))
1327 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1330 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1333 x86_64_nonmemory_operand (op, mode)
1335 enum machine_mode mode;
1338 return nonmemory_operand (op, mode);
1339 if (register_operand (op, mode))
1341 return x86_64_sign_extended_value (op);
1344 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1347 x86_64_movabs_operand (op, mode)
1349 enum machine_mode mode;
1351 if (!TARGET_64BIT || !flag_pic)
1352 return nonmemory_operand (op, mode);
1353 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1355 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1360 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1363 x86_64_szext_nonmemory_operand (op, mode)
1365 enum machine_mode mode;
1368 return nonmemory_operand (op, mode);
1369 if (register_operand (op, mode))
1371 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1374 /* Return nonzero if OP is immediate operand representable on x86_64. */
1377 x86_64_immediate_operand (op, mode)
1379 enum machine_mode mode;
1382 return immediate_operand (op, mode);
1383 return x86_64_sign_extended_value (op);
1386 /* Return nonzero if OP is immediate operand representable on x86_64. */
1389 x86_64_zext_immediate_operand (op, mode)
1391 enum machine_mode mode ATTRIBUTE_UNUSED;
1393 return x86_64_zero_extended_value (op);
1396 /* Return nonzero if OP is (const_int 1), else return zero. */
1399 const_int_1_operand (op, mode)
1401 enum machine_mode mode ATTRIBUTE_UNUSED;
1403 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1406 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1407 reference and a constant. */
1410 symbolic_operand (op, mode)
1412 enum machine_mode mode ATTRIBUTE_UNUSED;
1414 switch (GET_CODE (op))
1422 if (GET_CODE (op) == SYMBOL_REF
1423 || GET_CODE (op) == LABEL_REF
1424 || (GET_CODE (op) == UNSPEC
1425 && XINT (op, 1) >= 6
1426 && XINT (op, 1) <= 7))
1428 if (GET_CODE (op) != PLUS
1429 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1433 if (GET_CODE (op) == SYMBOL_REF
1434 || GET_CODE (op) == LABEL_REF)
1436 /* Only @GOTOFF gets offsets. */
1437 if (GET_CODE (op) != UNSPEC
1438 || XINT (op, 1) != 7)
1441 op = XVECEXP (op, 0, 0);
1442 if (GET_CODE (op) == SYMBOL_REF
1443 || GET_CODE (op) == LABEL_REF)
1452 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1455 pic_symbolic_operand (op, mode)
1457 enum machine_mode mode ATTRIBUTE_UNUSED;
1459 if (GET_CODE (op) == CONST)
1462 if (GET_CODE (op) == UNSPEC)
1464 if (GET_CODE (op) != PLUS
1465 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1468 if (GET_CODE (op) == UNSPEC)
1474 /* Test for a valid operand for a call instruction. Don't allow the
1475 arg pointer register or virtual regs since they may decay into
1476 reg + const, which the patterns can't handle. */
1479 call_insn_operand (op, mode)
1481 enum machine_mode mode ATTRIBUTE_UNUSED;
1483 /* Disallow indirect through a virtual register. This leads to
1484 compiler aborts when trying to eliminate them. */
1485 if (GET_CODE (op) == REG
1486 && (op == arg_pointer_rtx
1487 || op == frame_pointer_rtx
1488 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1489 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1492 /* Disallow `call 1234'. Due to varying assembler lameness this
1493 gets either rejected or translated to `call .+1234'. */
1494 if (GET_CODE (op) == CONST_INT)
1497 /* Explicitly allow SYMBOL_REF even if pic. */
1498 if (GET_CODE (op) == SYMBOL_REF)
1501 /* Half-pic doesn't allow anything but registers and constants.
1502 We've just taken care of the later. */
1504 return register_operand (op, Pmode);
1506 /* Otherwise we can allow any general_operand in the address. */
1507 return general_operand (op, Pmode);
1511 constant_call_address_operand (op, mode)
1513 enum machine_mode mode ATTRIBUTE_UNUSED;
1515 if (GET_CODE (op) == CONST
1516 && GET_CODE (XEXP (op, 0)) == PLUS
1517 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1518 op = XEXP (XEXP (op, 0), 0);
1519 return GET_CODE (op) == SYMBOL_REF;
1522 /* Match exactly zero and one. */
1525 const0_operand (op, mode)
1527 enum machine_mode mode;
1529 return op == CONST0_RTX (mode);
1533 const1_operand (op, mode)
1535 enum machine_mode mode ATTRIBUTE_UNUSED;
1537 return op == const1_rtx;
1540 /* Match 2, 4, or 8. Used for leal multiplicands. */
1543 const248_operand (op, mode)
1545 enum machine_mode mode ATTRIBUTE_UNUSED;
1547 return (GET_CODE (op) == CONST_INT
1548 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1551 /* True if this is a constant appropriate for an increment or decremenmt. */
1554 incdec_operand (op, mode)
1556 enum machine_mode mode ATTRIBUTE_UNUSED;
1558 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1559 registers, since carry flag is not set. */
1560 if (TARGET_PENTIUM4 && !optimize_size)
1562 return op == const1_rtx || op == constm1_rtx;
1565 /* Return nonzero if OP is acceptable as operand of DImode shift
1569 shiftdi_operand (op, mode)
1571 enum machine_mode mode ATTRIBUTE_UNUSED;
1574 return nonimmediate_operand (op, mode);
1576 return register_operand (op, mode);
1579 /* Return false if this is the stack pointer, or any other fake
1580 register eliminable to the stack pointer. Otherwise, this is
1583 This is used to prevent esp from being used as an index reg.
1584 Which would only happen in pathological cases. */
1587 reg_no_sp_operand (op, mode)
1589 enum machine_mode mode;
1592 if (GET_CODE (t) == SUBREG)
1594 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1597 return register_operand (op, mode);
1601 mmx_reg_operand (op, mode)
1603 enum machine_mode mode ATTRIBUTE_UNUSED;
1605 return MMX_REG_P (op);
1608 /* Return false if this is any eliminable register. Otherwise
1612 general_no_elim_operand (op, mode)
1614 enum machine_mode mode;
1617 if (GET_CODE (t) == SUBREG)
1619 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1620 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1621 || t == virtual_stack_dynamic_rtx)
1624 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1625 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1628 return general_operand (op, mode);
1631 /* Return false if this is any eliminable register. Otherwise
1632 register_operand or const_int. */
1635 nonmemory_no_elim_operand (op, mode)
1637 enum machine_mode mode;
1640 if (GET_CODE (t) == SUBREG)
1642 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1643 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1644 || t == virtual_stack_dynamic_rtx)
1647 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1650 /* Return true if op is a Q_REGS class register. */
1653 q_regs_operand (op, mode)
1655 enum machine_mode mode;
1657 if (mode != VOIDmode && GET_MODE (op) != mode)
1659 if (GET_CODE (op) == SUBREG)
1660 op = SUBREG_REG (op);
1661 return QI_REG_P (op);
1664 /* Return true if op is a NON_Q_REGS class register. */
1667 non_q_regs_operand (op, mode)
1669 enum machine_mode mode;
1671 if (mode != VOIDmode && GET_MODE (op) != mode)
1673 if (GET_CODE (op) == SUBREG)
1674 op = SUBREG_REG (op);
1675 return NON_QI_REG_P (op);
1678 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1681 sse_comparison_operator (op, mode)
1683 enum machine_mode mode ATTRIBUTE_UNUSED;
1685 enum rtx_code code = GET_CODE (op);
1688 /* Operations supported directly. */
1698 /* These are equivalent to ones above in non-IEEE comparisons. */
1705 return !TARGET_IEEE_FP;
1710 /* Return 1 if OP is a valid comparison operator in valid mode. */
1712 ix86_comparison_operator (op, mode)
1714 enum machine_mode mode;
1716 enum machine_mode inmode;
1717 enum rtx_code code = GET_CODE (op);
1718 if (mode != VOIDmode && GET_MODE (op) != mode)
1720 if (GET_RTX_CLASS (code) != '<')
1722 inmode = GET_MODE (XEXP (op, 0));
1724 if (inmode == CCFPmode || inmode == CCFPUmode)
1726 enum rtx_code second_code, bypass_code;
1727 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1728 return (bypass_code == NIL && second_code == NIL);
1735 if (inmode == CCmode || inmode == CCGCmode
1736 || inmode == CCGOCmode || inmode == CCNOmode)
1739 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1740 if (inmode == CCmode)
1744 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1752 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1755 fcmov_comparison_operator (op, mode)
1757 enum machine_mode mode;
1759 enum machine_mode inmode;
1760 enum rtx_code code = GET_CODE (op);
1761 if (mode != VOIDmode && GET_MODE (op) != mode)
1763 if (GET_RTX_CLASS (code) != '<')
1765 inmode = GET_MODE (XEXP (op, 0));
1766 if (inmode == CCFPmode || inmode == CCFPUmode)
1768 enum rtx_code second_code, bypass_code;
1769 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1770 if (bypass_code != NIL || second_code != NIL)
1772 code = ix86_fp_compare_code_to_integer (code);
1774 /* i387 supports just limited amount of conditional codes. */
1777 case LTU: case GTU: case LEU: case GEU:
1778 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1781 case ORDERED: case UNORDERED:
1789 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1792 promotable_binary_operator (op, mode)
1794 enum machine_mode mode ATTRIBUTE_UNUSED;
1796 switch (GET_CODE (op))
1799 /* Modern CPUs have same latency for HImode and SImode multiply,
1800 but 386 and 486 do HImode multiply faster. */
1801 return ix86_cpu > PROCESSOR_I486;
1813 /* Nearly general operand, but accept any const_double, since we wish
1814 to be able to drop them into memory rather than have them get pulled
1818 cmp_fp_expander_operand (op, mode)
1820 enum machine_mode mode;
1822 if (mode != VOIDmode && mode != GET_MODE (op))
1824 if (GET_CODE (op) == CONST_DOUBLE)
1826 return general_operand (op, mode);
1829 /* Match an SI or HImode register for a zero_extract. */
1832 ext_register_operand (op, mode)
1834 enum machine_mode mode ATTRIBUTE_UNUSED;
1837 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1838 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1841 if (!register_operand (op, VOIDmode))
1844 /* Be curefull to accept only registers having upper parts. */
1845 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1846 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
1849 /* Return 1 if this is a valid binary floating-point operation.
1850 OP is the expression matched, and MODE is its mode. */
1853 binary_fp_operator (op, mode)
1855 enum machine_mode mode;
1857 if (mode != VOIDmode && mode != GET_MODE (op))
1860 switch (GET_CODE (op))
1866 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1874 mult_operator(op, mode)
1876 enum machine_mode mode ATTRIBUTE_UNUSED;
1878 return GET_CODE (op) == MULT;
1882 div_operator(op, mode)
1884 enum machine_mode mode ATTRIBUTE_UNUSED;
1886 return GET_CODE (op) == DIV;
1890 arith_or_logical_operator (op, mode)
1892 enum machine_mode mode;
1894 return ((mode == VOIDmode || GET_MODE (op) == mode)
1895 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1896 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1899 /* Returns 1 if OP is memory operand with a displacement. */
1902 memory_displacement_operand (op, mode)
1904 enum machine_mode mode;
1906 struct ix86_address parts;
1908 if (! memory_operand (op, mode))
1911 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1914 return parts.disp != NULL_RTX;
1917 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1918 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1920 ??? It seems likely that this will only work because cmpsi is an
1921 expander, and no actual insns use this. */
1924 cmpsi_operand (op, mode)
1926 enum machine_mode mode;
1928 if (nonimmediate_operand (op, mode))
1931 if (GET_CODE (op) == AND
1932 && GET_MODE (op) == SImode
1933 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1934 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1935 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1936 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1937 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1938 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1944 /* Returns 1 if OP is memory operand that can not be represented by the
1948 long_memory_operand (op, mode)
1950 enum machine_mode mode;
1952 if (! memory_operand (op, mode))
1955 return memory_address_length (op) != 0;
1958 /* Return nonzero if the rtx is known aligned. */
1961 aligned_operand (op, mode)
1963 enum machine_mode mode;
1965 struct ix86_address parts;
1967 if (!general_operand (op, mode))
1970 /* Registers and immediate operands are always "aligned". */
1971 if (GET_CODE (op) != MEM)
1974 /* Don't even try to do any aligned optimizations with volatiles. */
1975 if (MEM_VOLATILE_P (op))
1980 /* Pushes and pops are only valid on the stack pointer. */
1981 if (GET_CODE (op) == PRE_DEC
1982 || GET_CODE (op) == POST_INC)
1985 /* Decode the address. */
1986 if (! ix86_decompose_address (op, &parts))
1989 /* Look for some component that isn't known to be aligned. */
1993 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1998 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2003 if (GET_CODE (parts.disp) != CONST_INT
2004 || (INTVAL (parts.disp) & 3) != 0)
2008 /* Didn't find one -- this must be an aligned address. */
2012 /* Return true if the constant is something that can be loaded with
2013 a special instruction. Only handle 0.0 and 1.0; others are less
2017 standard_80387_constant_p (x)
2020 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
2022 /* Note that on the 80387, other constants, such as pi, that we should support
2023 too. On some machines, these are much slower to load as standard constant,
2024 than to load from doubles in memory. */
2025 if (x == CONST0_RTX (GET_MODE (x)))
2027 if (x == CONST1_RTX (GET_MODE (x)))
2032 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
2035 standard_sse_constant_p (x)
2038 if (GET_CODE (x) != CONST_DOUBLE)
2040 return (x == CONST0_RTX (GET_MODE (x)));
2043 /* Returns 1 if OP contains a symbol reference */
2046 symbolic_reference_mentioned_p (op)
2049 register const char *fmt;
2052 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2055 fmt = GET_RTX_FORMAT (GET_CODE (op));
2056 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2062 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2063 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2067 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2074 /* Return 1 if it is appropriate to emit `ret' instructions in the
2075 body of a function. Do this only if the epilogue is simple, needing a
2076 couple of insns. Prior to reloading, we can't tell how many registers
2077 must be saved, so return 0 then. Return 0 if there is no frame
2078 marker to de-allocate.
2080 If NON_SAVING_SETJMP is defined and true, then it is not possible
2081 for the epilogue to be simple, so return 0. This is a special case
2082 since NON_SAVING_SETJMP will not cause regs_ever_live to change
2083 until final, but jump_optimize may need to know sooner if a
2087 ix86_can_use_return_insn_p ()
2089 struct ix86_frame frame;
2091 #ifdef NON_SAVING_SETJMP
2092 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
2095 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2096 if (profile_block_flag == 2)
2100 if (! reload_completed || frame_pointer_needed)
2103 /* Don't allow more than 32 pop, since that's all we can do
2104 with one instruction. */
2105 if (current_function_pops_args
2106 && current_function_args_size >= 32768)
2109 ix86_compute_frame_layout (&frame);
2110 return frame.to_allocate == 0 && frame.nregs == 0;
2113 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
2115 x86_64_sign_extended_value (value)
2118 switch (GET_CODE (value))
2120 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
2121 to be at least 32 and this all acceptable constants are
2122 represented as CONST_INT. */
2124 if (HOST_BITS_PER_WIDE_INT == 32)
2128 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
2129 return trunc_int_for_mode (val, SImode) == val;
2133 /* For certain code models, the symbolic references are known to fit. */
2135 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
2137 /* For certain code models, the code is near as well. */
2139 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2141 /* We also may accept the offsetted memory references in certain special
2144 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2145 && XVECLEN (XEXP (value, 0), 0) == 1
2146 && XINT (XEXP (value, 0), 1) == 15)
2148 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2150 rtx op1 = XEXP (XEXP (value, 0), 0);
2151 rtx op2 = XEXP (XEXP (value, 0), 1);
2152 HOST_WIDE_INT offset;
2154 if (ix86_cmodel == CM_LARGE)
2156 if (GET_CODE (op2) != CONST_INT)
2158 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2159 switch (GET_CODE (op1))
2162 /* For CM_SMALL assume that latest object is 1MB before
2163 end of 31bits boundary. We may also accept pretty
2164 large negative constants knowing that all objects are
2165 in the positive half of address space. */
2166 if (ix86_cmodel == CM_SMALL
2167 && offset < 1024*1024*1024
2168 && trunc_int_for_mode (offset, SImode) == offset)
2170 /* For CM_KERNEL we know that all object resist in the
2171 negative half of 32bits address space. We may not
2172 accept negative offsets, since they may be just off
2173 and we may accept pretty large possitive ones. */
2174 if (ix86_cmodel == CM_KERNEL
2176 && trunc_int_for_mode (offset, SImode) == offset)
2180 /* These conditions are similar to SYMBOL_REF ones, just the
2181 constraints for code models differ. */
2182 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2183 && offset < 1024*1024*1024
2184 && trunc_int_for_mode (offset, SImode) == offset)
2186 if (ix86_cmodel == CM_KERNEL
2188 && trunc_int_for_mode (offset, SImode) == offset)
2201 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
2203 x86_64_zero_extended_value (value)
2206 switch (GET_CODE (value))
2209 if (HOST_BITS_PER_WIDE_INT == 32)
2210 return (GET_MODE (value) == VOIDmode
2211 && !CONST_DOUBLE_HIGH (value));
2215 if (HOST_BITS_PER_WIDE_INT == 32)
2216 return INTVAL (value) >= 0;
2218 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2221 /* For certain code models, the symbolic references are known to fit. */
2223 return ix86_cmodel == CM_SMALL;
2225 /* For certain code models, the code is near as well. */
2227 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2229 /* We also may accept the offsetted memory references in certain special
2232 if (GET_CODE (XEXP (value, 0)) == PLUS)
2234 rtx op1 = XEXP (XEXP (value, 0), 0);
2235 rtx op2 = XEXP (XEXP (value, 0), 1);
2237 if (ix86_cmodel == CM_LARGE)
2239 switch (GET_CODE (op1))
2243 /* For small code model we may accept pretty large possitive
2244 offsets, since one bit is available for free. Negative
2245 offsets are limited by the size of NULL pointer area
2246 specified by the ABI. */
2247 if (ix86_cmodel == CM_SMALL
2248 && GET_CODE (op2) == CONST_INT
2249 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2250 && (trunc_int_for_mode (INTVAL (op2), SImode)
2253 /* ??? For the kernel, we may accept adjustment of
2254 -0x10000000, since we know that it will just convert
2255 negative address space to possitive, but perhaps this
2256 is not worthwhile. */
2259 /* These conditions are similar to SYMBOL_REF ones, just the
2260 constraints for code models differ. */
2261 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2262 && GET_CODE (op2) == CONST_INT
2263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2264 && (trunc_int_for_mode (INTVAL (op2), SImode)
2278 /* Value should be nonzero if functions must have frame pointers.
2279 Zero means the frame pointer need not be set up (and parms may
2280 be accessed via the stack pointer) in functions that seem suitable. */
2283 ix86_frame_pointer_required ()
2285 /* If we accessed previous frames, then the generated code expects
2286 to be able to access the saved ebp value in our frame. */
2287 if (cfun->machine->accesses_prev_frame)
2290 /* Several x86 os'es need a frame pointer for other reasons,
2291 usually pertaining to setjmp. */
2292 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2295 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2296 the frame pointer by default. Turn it back on now if we've not
2297 got a leaf function. */
2298 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2304 /* Record that the current function accesses previous call frames. */
2307 ix86_setup_frame_addresses ()
2309 cfun->machine->accesses_prev_frame = 1;
2312 static char pic_label_name[32];
2314 /* This function generates code for -fpic that loads %ebx with
2315 the return address of the caller and then returns. */
2318 ix86_asm_file_end (file)
2323 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2326 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2327 to updating relocations to a section being discarded such that this
2328 doesn't work. Ought to detect this at configure time. */
2330 /* The trick here is to create a linkonce section containing the
2331 pic label thunk, but to refer to it with an internal label.
2332 Because the label is internal, we don't have inter-dso name
2333 binding issues on hosts that don't support ".hidden".
2335 In order to use these macros, however, we must create a fake
2337 if (targetm.have_named_sections)
2339 tree decl = build_decl (FUNCTION_DECL,
2340 get_identifier ("i686.get_pc_thunk"),
2342 DECL_ONE_ONLY (decl) = 1;
2343 UNIQUE_SECTION (decl, 0);
2344 named_section (decl, NULL);
2351 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2352 internal (non-global) label that's being emitted, it didn't make
2353 sense to have .type information for local labels. This caused
2354 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2355 me debug info for a label that you're declaring non-global?) this
2356 was changed to call ASM_OUTPUT_LABEL() instead. */
2358 ASM_OUTPUT_LABEL (file, pic_label_name);
2360 xops[0] = pic_offset_table_rtx;
2361 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2362 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2363 output_asm_insn ("ret", xops);
2367 load_pic_register ()
2374 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2376 if (TARGET_DEEP_BRANCH_PREDICTION)
2378 if (! pic_label_name[0])
2379 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
2380 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
2384 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
2387 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2389 if (! TARGET_DEEP_BRANCH_PREDICTION)
2390 emit_insn (gen_popsi1 (pic_offset_table_rtx));
2392 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
2395 /* Generate an "push" pattern for input ARG. */
2401 return gen_rtx_SET (VOIDmode,
2403 gen_rtx_PRE_DEC (Pmode,
2404 stack_pointer_rtx)),
2408 /* Return 1 if we need to save REGNO. */
2410 ix86_save_reg (regno, maybe_eh_return)
2412 int maybe_eh_return;
2416 && regno == PIC_OFFSET_TABLE_REGNUM
2417 && (current_function_uses_pic_offset_table
2418 || current_function_uses_const_pool
2419 || current_function_calls_eh_return))
2422 if (current_function_calls_eh_return && maybe_eh_return)
2427 unsigned test = EH_RETURN_DATA_REGNO(i);
2428 if (test == INVALID_REGNUM)
2430 if (test == (unsigned) regno)
2435 return (regs_ever_live[regno]
2436 && !call_used_regs[regno]
2437 && !fixed_regs[regno]
2438 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
2441 /* Return number of registers to be saved on the stack. */
2449 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2450 if (ix86_save_reg (regno, true))
2455 /* Return the offset between two registers, one to be eliminated, and the other
2456 its replacement, at the start of a routine. */
2459 ix86_initial_elimination_offset (from, to)
2463 struct ix86_frame frame;
2464 ix86_compute_frame_layout (&frame);
2466 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2467 return frame.hard_frame_pointer_offset;
2468 else if (from == FRAME_POINTER_REGNUM
2469 && to == HARD_FRAME_POINTER_REGNUM)
2470 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
2473 if (to != STACK_POINTER_REGNUM)
2475 else if (from == ARG_POINTER_REGNUM)
2476 return frame.stack_pointer_offset;
2477 else if (from != FRAME_POINTER_REGNUM)
2480 return frame.stack_pointer_offset - frame.frame_pointer_offset;
2484 /* Fill structure ix86_frame about frame of currently computed function. */
2487 ix86_compute_frame_layout (frame)
2488 struct ix86_frame *frame;
2490 HOST_WIDE_INT total_size;
2491 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
2493 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
2494 HOST_WIDE_INT size = get_frame_size ();
2496 frame->nregs = ix86_nsaved_regs ();
2499 /* Skip return value and save base pointer. */
2500 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2502 frame->hard_frame_pointer_offset = offset;
2504 /* Do some sanity checking of stack_alignment_needed and
2505 preferred_alignment, since i386 port is the only using those features
2506 that may break easilly. */
2508 if (size && !stack_alignment_needed)
2510 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2512 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2514 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2517 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2518 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2520 /* Register save area */
2521 offset += frame->nregs * UNITS_PER_WORD;
2524 if (ix86_save_varrargs_registers)
2526 offset += X86_64_VARARGS_SIZE;
2527 frame->va_arg_size = X86_64_VARARGS_SIZE;
2530 frame->va_arg_size = 0;
2532 /* Align start of frame for local function. */
2533 frame->padding1 = ((offset + stack_alignment_needed - 1)
2534 & -stack_alignment_needed) - offset;
2536 offset += frame->padding1;
2538 /* Frame pointer points here. */
2539 frame->frame_pointer_offset = offset;
2543 /* Add outgoing arguments area. */
2544 if (ACCUMULATE_OUTGOING_ARGS)
2546 offset += current_function_outgoing_args_size;
2547 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2550 frame->outgoing_arguments_size = 0;
2552 /* Align stack boundary. */
2553 frame->padding2 = ((offset + preferred_alignment - 1)
2554 & -preferred_alignment) - offset;
2556 offset += frame->padding2;
2558 /* We've reached end of stack frame. */
2559 frame->stack_pointer_offset = offset;
2561 /* Size prologue needs to allocate. */
2562 frame->to_allocate =
2563 (size + frame->padding1 + frame->padding2
2564 + frame->outgoing_arguments_size + frame->va_arg_size);
2566 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2567 && current_function_is_leaf)
2569 frame->red_zone_size = frame->to_allocate;
2570 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2571 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2574 frame->red_zone_size = 0;
2575 frame->to_allocate -= frame->red_zone_size;
2576 frame->stack_pointer_offset -= frame->red_zone_size;
2578 fprintf (stderr, "nregs: %i\n", frame->nregs);
2579 fprintf (stderr, "size: %i\n", size);
2580 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2581 fprintf (stderr, "padding1: %i\n", frame->padding1);
2582 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
2583 fprintf (stderr, "padding2: %i\n", frame->padding2);
2584 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2585 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
2586 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2587 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2588 frame->hard_frame_pointer_offset);
2589 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2593 /* Emit code to save registers in the prologue. */
2596 ix86_emit_save_regs ()
2601 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2602 if (ix86_save_reg (regno, true))
2604 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
2605 RTX_FRAME_RELATED_P (insn) = 1;
2609 /* Emit code to save registers using MOV insns. First register
2610 is restored from POINTER + OFFSET. */
2612 ix86_emit_save_regs_using_mov (pointer, offset)
2614 HOST_WIDE_INT offset;
2619 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2620 if (ix86_save_reg (regno, true))
2622 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
2624 gen_rtx_REG (Pmode, regno));
2625 RTX_FRAME_RELATED_P (insn) = 1;
2626 offset += UNITS_PER_WORD;
2630 /* Expand the prologue into a bunch of separate insns. */
2633 ix86_expand_prologue ()
2636 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2637 || current_function_uses_const_pool)
2639 struct ix86_frame frame;
2640 int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
2641 HOST_WIDE_INT allocate;
2643 ix86_compute_frame_layout (&frame);
2645 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2646 slower on all targets. Also sdb doesn't like it. */
2648 if (frame_pointer_needed)
2650 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2651 RTX_FRAME_RELATED_P (insn) = 1;
2653 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2654 RTX_FRAME_RELATED_P (insn) = 1;
2657 allocate = frame.to_allocate;
2658 /* In case we are dealing only with single register and empty frame,
2659 push is equivalent of the mov+add sequence. */
2660 if (allocate == 0 && frame.nregs <= 1)
2664 ix86_emit_save_regs ();
2666 allocate += frame.nregs * UNITS_PER_WORD;
2670 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
2672 insn = emit_insn (gen_pro_epilogue_adjust_stack
2673 (stack_pointer_rtx, stack_pointer_rtx,
2674 GEN_INT (-allocate)));
2675 RTX_FRAME_RELATED_P (insn) = 1;
2679 /* ??? Is this only valid for Win32? */
2686 arg0 = gen_rtx_REG (SImode, 0);
2687 emit_move_insn (arg0, GEN_INT (allocate));
2689 sym = gen_rtx_MEM (FUNCTION_MODE,
2690 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2691 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
2693 CALL_INSN_FUNCTION_USAGE (insn)
2694 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2695 CALL_INSN_FUNCTION_USAGE (insn));
2699 if (!frame_pointer_needed || !frame.to_allocate)
2700 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
2702 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
2703 -frame.nregs * UNITS_PER_WORD);
2706 #ifdef SUBTARGET_PROLOGUE
2711 load_pic_register ();
2713 /* If we are profiling, make sure no instructions are scheduled before
2714 the call to mcount. However, if -fpic, the above call will have
2716 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2717 emit_insn (gen_blockage ());
2720 /* Emit code to restore saved registers using MOV insns. First register
2721 is restored from POINTER + OFFSET. */
2723 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2726 int maybe_eh_return;
2730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2731 if (ix86_save_reg (regno, maybe_eh_return))
2733 emit_move_insn (gen_rtx_REG (Pmode, regno),
2734 adjust_address (gen_rtx_MEM (Pmode, pointer),
2736 offset += UNITS_PER_WORD;
2740 /* Restore function stack, frame, and registers. */
2743 ix86_expand_epilogue (style)
2747 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2748 struct ix86_frame frame;
2749 HOST_WIDE_INT offset;
2751 ix86_compute_frame_layout (&frame);
2753 /* Calculate start of saved registers relative to ebp. Special care
2754 must be taken for the normal return case of a function using
2755 eh_return: the eax and edx registers are marked as saved, but not
2756 restored along this path. */
2757 offset = frame.nregs;
2758 if (current_function_calls_eh_return && style != 2)
2760 offset *= -UNITS_PER_WORD;
2762 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2763 if (profile_block_flag == 2)
2765 FUNCTION_BLOCK_PROFILER_EXIT;
2769 /* If we're only restoring one register and sp is not valid then
2770 using a move instruction to restore the register since it's
2771 less work than reloading sp and popping the register.
2773 The default code result in stack adjustment using add/lea instruction,
2774 while this code results in LEAVE instruction (or discrete equivalent),
2775 so it is profitable in some other cases as well. Especially when there
2776 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2777 and there is exactly one register to pop. This heruistic may need some
2778 tuning in future. */
2779 if ((!sp_valid && frame.nregs <= 1)
2780 || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
2781 && (frame.nregs > 1 || frame.to_allocate))
2782 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2783 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2784 && frame.nregs == 1)
2787 /* Restore registers. We can use ebp or esp to address the memory
2788 locations. If both are available, default to ebp, since offsets
2789 are known to be small. Only exception is esp pointing directly to the
2790 end of block of saved registers, where we may simplify addressing
2793 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2794 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2795 frame.to_allocate, style == 2);
2797 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2798 offset, style == 2);
2800 /* eh_return epilogues need %ecx added to the stack pointer. */
2803 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2805 if (frame_pointer_needed)
2807 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2808 tmp = plus_constant (tmp, UNITS_PER_WORD);
2809 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2811 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2812 emit_move_insn (hard_frame_pointer_rtx, tmp);
2814 emit_insn (gen_pro_epilogue_adjust_stack
2815 (stack_pointer_rtx, sa, const0_rtx));
2819 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2820 tmp = plus_constant (tmp, (frame.to_allocate
2821 + frame.nregs * UNITS_PER_WORD));
2822 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2825 else if (!frame_pointer_needed)
2826 emit_insn (gen_pro_epilogue_adjust_stack
2827 (stack_pointer_rtx, stack_pointer_rtx,
2828 GEN_INT (frame.to_allocate
2829 + frame.nregs * UNITS_PER_WORD)));
2830 /* If not an i386, mov & pop is faster than "leave". */
2831 else if (TARGET_USE_LEAVE || optimize_size)
2832 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
2835 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2836 hard_frame_pointer_rtx,
2839 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2841 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2846 /* First step is to deallocate the stack frame so that we can
2847 pop the registers. */
2850 if (!frame_pointer_needed)
2852 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2853 hard_frame_pointer_rtx,
2856 else if (frame.to_allocate)
2857 emit_insn (gen_pro_epilogue_adjust_stack
2858 (stack_pointer_rtx, stack_pointer_rtx,
2859 GEN_INT (frame.to_allocate)));
2861 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2862 if (ix86_save_reg (regno, false))
2865 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2867 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2869 if (frame_pointer_needed)
2872 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2874 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2878 /* Sibcall epilogues don't want a return instruction. */
2882 if (current_function_pops_args && current_function_args_size)
2884 rtx popc = GEN_INT (current_function_pops_args);
2886 /* i386 can only pop 64K bytes. If asked to pop more, pop
2887 return address, do explicit add, and jump indirectly to the
2890 if (current_function_pops_args >= 65536)
2892 rtx ecx = gen_rtx_REG (SImode, 2);
2894 /* There are is no "pascal" calling convention in 64bit ABI. */
2898 emit_insn (gen_popsi1 (ecx));
2899 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2900 emit_jump_insn (gen_return_indirect_internal (ecx));
2903 emit_jump_insn (gen_return_pop_internal (popc));
2906 emit_jump_insn (gen_return_internal ());
2909 /* Extract the parts of an RTL expression that is a valid memory address
2910 for an instruction. Return false if the structure of the address is
2914 ix86_decompose_address (addr, out)
2916 struct ix86_address *out;
2918 rtx base = NULL_RTX;
2919 rtx index = NULL_RTX;
2920 rtx disp = NULL_RTX;
2921 HOST_WIDE_INT scale = 1;
2922 rtx scale_rtx = NULL_RTX;
2924 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2926 else if (GET_CODE (addr) == PLUS)
2928 rtx op0 = XEXP (addr, 0);
2929 rtx op1 = XEXP (addr, 1);
2930 enum rtx_code code0 = GET_CODE (op0);
2931 enum rtx_code code1 = GET_CODE (op1);
2933 if (code0 == REG || code0 == SUBREG)
2935 if (code1 == REG || code1 == SUBREG)
2936 index = op0, base = op1; /* index + base */
2938 base = op0, disp = op1; /* base + displacement */
2940 else if (code0 == MULT)
2942 index = XEXP (op0, 0);
2943 scale_rtx = XEXP (op0, 1);
2944 if (code1 == REG || code1 == SUBREG)
2945 base = op1; /* index*scale + base */
2947 disp = op1; /* index*scale + disp */
2949 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2951 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2952 scale_rtx = XEXP (XEXP (op0, 0), 1);
2953 base = XEXP (op0, 1);
2956 else if (code0 == PLUS)
2958 index = XEXP (op0, 0); /* index + base + disp */
2959 base = XEXP (op0, 1);
2965 else if (GET_CODE (addr) == MULT)
2967 index = XEXP (addr, 0); /* index*scale */
2968 scale_rtx = XEXP (addr, 1);
2970 else if (GET_CODE (addr) == ASHIFT)
2974 /* We're called for lea too, which implements ashift on occasion. */
2975 index = XEXP (addr, 0);
2976 tmp = XEXP (addr, 1);
2977 if (GET_CODE (tmp) != CONST_INT)
2979 scale = INTVAL (tmp);
2980 if ((unsigned HOST_WIDE_INT) scale > 3)
2985 disp = addr; /* displacement */
2987 /* Extract the integral value of scale. */
2990 if (GET_CODE (scale_rtx) != CONST_INT)
2992 scale = INTVAL (scale_rtx);
2995 /* Allow arg pointer and stack pointer as index if there is not scaling */
2996 if (base && index && scale == 1
2997 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2998 || index == stack_pointer_rtx))
3005 /* Special case: %ebp cannot be encoded as a base without a displacement. */
3006 if ((base == hard_frame_pointer_rtx
3007 || base == frame_pointer_rtx
3008 || base == arg_pointer_rtx) && !disp)
3011 /* Special case: on K6, [%esi] makes the instruction vector decoded.
3012 Avoid this by transforming to [%esi+0]. */
3013 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
3014 && base && !index && !disp
3016 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
3019 /* Special case: encode reg+reg instead of reg*2. */
3020 if (!base && index && scale && scale == 2)
3021 base = index, scale = 1;
3023 /* Special case: scaling cannot be encoded without base or displacement. */
3024 if (!base && !disp && index && scale != 1)
3035 /* Return cost of the memory address x.
3036 For i386, it is better to use a complex address than let gcc copy
3037 the address into a reg and make a new pseudo. But not if the address
3038 requires to two regs - that would mean more pseudos with longer
3041 ix86_address_cost (x)
3044 struct ix86_address parts;
3047 if (!ix86_decompose_address (x, &parts))
3050 /* More complex memory references are better. */
3051 if (parts.disp && parts.disp != const0_rtx)
3054 /* Attempt to minimize number of registers in the address. */
3056 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
3058 && (!REG_P (parts.index)
3059 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
3063 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
3065 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
3066 && parts.base != parts.index)
3069 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
3070 since it's predecode logic can't detect the length of instructions
3071 and it degenerates to vector decoded. Increase cost of such
3072 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
3073 to split such addresses or even refuse such addresses at all.
3075 Following addressing modes are affected:
3080 The first and last case may be avoidable by explicitly coding the zero in
3081 memory address, but I don't have AMD-K6 machine handy to check this
3085 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
3086 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
3087 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
3093 /* If X is a machine specific address (i.e. a symbol or label being
3094 referenced as a displacement from the GOT implemented using an
3095 UNSPEC), then return the base term. Otherwise return X. */
3098 ix86_find_base_term (x)
3103 if (GET_CODE (x) != PLUS
3104 || XEXP (x, 0) != pic_offset_table_rtx
3105 || GET_CODE (XEXP (x, 1)) != CONST)
3108 term = XEXP (XEXP (x, 1), 0);
3110 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
3111 term = XEXP (term, 0);
3113 if (GET_CODE (term) != UNSPEC
3114 || XVECLEN (term, 0) != 1
3115 || XINT (term, 1) != 7)
3118 term = XVECEXP (term, 0, 0);
3120 if (GET_CODE (term) != SYMBOL_REF
3121 && GET_CODE (term) != LABEL_REF)
3127 /* Determine if a given CONST RTX is a valid memory displacement
3131 legitimate_pic_address_disp_p (disp)
3134 if (GET_CODE (disp) != CONST)
3136 disp = XEXP (disp, 0);
3138 if (GET_CODE (disp) == PLUS)
3140 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
3142 disp = XEXP (disp, 0);
3145 if (GET_CODE (disp) != UNSPEC
3146 || XVECLEN (disp, 0) != 1)
3149 /* Must be @GOT or @GOTOFF. */
3150 if (XINT (disp, 1) != 6
3151 && XINT (disp, 1) != 7)
3154 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3155 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3161 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3162 memory address for an instruction. The MODE argument is the machine mode
3163 for the MEM expression that wants to use this address.
3165 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3166 convert common non-canonical forms to canonical form so that they will
3170 legitimate_address_p (mode, addr, strict)
3171 enum machine_mode mode;
3175 struct ix86_address parts;
3176 rtx base, index, disp;
3177 HOST_WIDE_INT scale;
3178 const char *reason = NULL;
3179 rtx reason_rtx = NULL_RTX;
3181 if (TARGET_DEBUG_ADDR)
3184 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3185 GET_MODE_NAME (mode), strict);
3189 if (! ix86_decompose_address (addr, &parts))
3191 reason = "decomposition failed";
3196 index = parts.index;
3198 scale = parts.scale;
3200 /* Validate base register.
3202 Don't allow SUBREG's here, it can lead to spill failures when the base
3203 is one word out of a two word structure, which is represented internally
3210 if (GET_CODE (base) != REG)
3212 reason = "base is not a register";
3216 if (GET_MODE (base) != Pmode)
3218 reason = "base is not in Pmode";
3222 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3223 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3225 reason = "base is not valid";
3230 /* Validate index register.
3232 Don't allow SUBREG's here, it can lead to spill failures when the index
3233 is one word out of a two word structure, which is represented internally
3240 if (GET_CODE (index) != REG)
3242 reason = "index is not a register";
3246 if (GET_MODE (index) != Pmode)
3248 reason = "index is not in Pmode";
3252 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3253 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3255 reason = "index is not valid";
3260 /* Validate scale factor. */
3263 reason_rtx = GEN_INT (scale);
3266 reason = "scale without index";
3270 if (scale != 2 && scale != 4 && scale != 8)
3272 reason = "scale is not a valid multiplier";
3277 /* Validate displacement. */
3282 if (!CONSTANT_ADDRESS_P (disp))
3284 reason = "displacement is not constant";
3290 if (!x86_64_sign_extended_value (disp))
3292 reason = "displacement is out of range";
3298 if (GET_CODE (disp) == CONST_DOUBLE)
3300 reason = "displacement is a const_double";
3305 if (flag_pic && SYMBOLIC_CONST (disp))
3307 if (TARGET_64BIT && (index || base))
3309 reason = "non-constant pic memory reference";
3312 if (! legitimate_pic_address_disp_p (disp))
3314 reason = "displacement is an invalid pic construct";
3318 /* This code used to verify that a symbolic pic displacement
3319 includes the pic_offset_table_rtx register.
3321 While this is good idea, unfortunately these constructs may
3322 be created by "adds using lea" optimization for incorrect
3331 This code is nonsensical, but results in addressing
3332 GOT table with pic_offset_table_rtx base. We can't
3333 just refuse it easilly, since it gets matched by
3334 "addsi3" pattern, that later gets split to lea in the
3335 case output register differs from input. While this
3336 can be handled by separate addsi pattern for this case
3337 that never results in lea, this seems to be easier and
3338 correct fix for crash to disable this test. */
3340 else if (HALF_PIC_P ())
3342 if (! HALF_PIC_ADDRESS_P (disp)
3343 || (base != NULL_RTX || index != NULL_RTX))
3345 reason = "displacement is an invalid half-pic reference";
3351 /* Everything looks valid. */
3352 if (TARGET_DEBUG_ADDR)
3353 fprintf (stderr, "Success.\n");
3357 if (TARGET_DEBUG_ADDR)
3359 fprintf (stderr, "Error: %s\n", reason);
3360 debug_rtx (reason_rtx);
3365 /* Return an unique alias set for the GOT. */
3367 static HOST_WIDE_INT
3368 ix86_GOT_alias_set ()
3370 static HOST_WIDE_INT set = -1;
3372 set = new_alias_set ();
3376 /* Return a legitimate reference for ORIG (an address) using the
3377 register REG. If REG is 0, a new pseudo is generated.
3379 There are two types of references that must be handled:
3381 1. Global data references must load the address from the GOT, via
3382 the PIC reg. An insn is emitted to do this load, and the reg is
3385 2. Static data references, constant pool addresses, and code labels
3386 compute the address as an offset from the GOT, whose base is in
3387 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3388 differentiate them from global data objects. The returned
3389 address is the PIC reg + an unspec constant.
3391 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
3392 reg also appears in the address. */
3395 legitimize_pic_address (orig, reg)
3403 if (GET_CODE (addr) == LABEL_REF
3404 || (GET_CODE (addr) == SYMBOL_REF
3405 && (CONSTANT_POOL_ADDRESS_P (addr)
3406 || SYMBOL_REF_FLAG (addr))))
3408 /* This symbol may be referenced via a displacement from the PIC
3409 base address (@GOTOFF). */
3411 current_function_uses_pic_offset_table = 1;
3412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3413 new = gen_rtx_CONST (Pmode, new);
3414 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3418 emit_move_insn (reg, new);
3422 else if (GET_CODE (addr) == SYMBOL_REF)
3424 /* This symbol must be referenced via a load from the
3425 Global Offset Table (@GOT). */
3427 current_function_uses_pic_offset_table = 1;
3428 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3429 new = gen_rtx_CONST (Pmode, new);
3430 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3431 new = gen_rtx_MEM (Pmode, new);
3432 RTX_UNCHANGING_P (new) = 1;
3433 set_mem_alias_set (new, ix86_GOT_alias_set ());
3436 reg = gen_reg_rtx (Pmode);
3437 emit_move_insn (reg, new);
3442 if (GET_CODE (addr) == CONST)
3444 addr = XEXP (addr, 0);
3445 if (GET_CODE (addr) == UNSPEC)
3447 /* Check that the unspec is one of the ones we generate? */
3449 else if (GET_CODE (addr) != PLUS)
3452 if (GET_CODE (addr) == PLUS)
3454 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3456 /* Check first to see if this is a constant offset from a @GOTOFF
3457 symbol reference. */
3458 if ((GET_CODE (op0) == LABEL_REF
3459 || (GET_CODE (op0) == SYMBOL_REF
3460 && (CONSTANT_POOL_ADDRESS_P (op0)
3461 || SYMBOL_REF_FLAG (op0))))
3462 && GET_CODE (op1) == CONST_INT)
3464 current_function_uses_pic_offset_table = 1;
3465 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3466 new = gen_rtx_PLUS (Pmode, new, op1);
3467 new = gen_rtx_CONST (Pmode, new);
3468 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3472 emit_move_insn (reg, new);
3478 base = legitimize_pic_address (XEXP (addr, 0), reg);
3479 new = legitimize_pic_address (XEXP (addr, 1),
3480 base == reg ? NULL_RTX : reg);
3482 if (GET_CODE (new) == CONST_INT)
3483 new = plus_constant (base, INTVAL (new));
3486 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3488 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3489 new = XEXP (new, 1);
3491 new = gen_rtx_PLUS (Pmode, base, new);
3499 /* Try machine-dependent ways of modifying an illegitimate address
3500 to be legitimate. If we find one, return the new, valid address.
3501 This macro is used in only one place: `memory_address' in explow.c.
3503 OLDX is the address as it was before break_out_memory_refs was called.
3504 In some cases it is useful to look at this to decide what needs to be done.
3506 MODE and WIN are passed so that this macro can use
3507 GO_IF_LEGITIMATE_ADDRESS.
3509 It is always safe for this macro to do nothing. It exists to recognize
3510 opportunities to optimize the output.
3512 For the 80386, we handle X+REG by loading X into a register R and
3513 using R+REG. R will go in a general reg and indexing will be used.
3514 However, if REG is a broken-out memory address or multiplication,
3515 nothing needs to be done because REG can certainly go in a general reg.
3517 When -fpic is used, special handling is needed for symbolic references.
3518 See comments by legitimize_pic_address in i386.c for details. */
3521 legitimize_address (x, oldx, mode)
3523 register rtx oldx ATTRIBUTE_UNUSED;
3524 enum machine_mode mode;
3529 if (TARGET_DEBUG_ADDR)
3531 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3532 GET_MODE_NAME (mode));
3536 if (flag_pic && SYMBOLIC_CONST (x))
3537 return legitimize_pic_address (x, 0);
3539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3540 if (GET_CODE (x) == ASHIFT
3541 && GET_CODE (XEXP (x, 1)) == CONST_INT
3542 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3545 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3546 GEN_INT (1 << log));
3549 if (GET_CODE (x) == PLUS)
3551 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3553 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3554 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3555 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3558 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3559 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3560 GEN_INT (1 << log));
3563 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3564 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3565 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3568 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3569 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3570 GEN_INT (1 << log));
3573 /* Put multiply first if it isn't already. */
3574 if (GET_CODE (XEXP (x, 1)) == MULT)
3576 rtx tmp = XEXP (x, 0);
3577 XEXP (x, 0) = XEXP (x, 1);
3582 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3583 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3584 created by virtual register instantiation, register elimination, and
3585 similar optimizations. */
3586 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3589 x = gen_rtx_PLUS (Pmode,
3590 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3591 XEXP (XEXP (x, 1), 0)),
3592 XEXP (XEXP (x, 1), 1));
3596 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3597 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3598 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3599 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3600 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3601 && CONSTANT_P (XEXP (x, 1)))
3604 rtx other = NULL_RTX;
3606 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3608 constant = XEXP (x, 1);
3609 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3611 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3613 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3614 other = XEXP (x, 1);
3622 x = gen_rtx_PLUS (Pmode,
3623 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3624 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3625 plus_constant (other, INTVAL (constant)));
3629 if (changed && legitimate_address_p (mode, x, FALSE))
3632 if (GET_CODE (XEXP (x, 0)) == MULT)
3635 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3638 if (GET_CODE (XEXP (x, 1)) == MULT)
3641 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3645 && GET_CODE (XEXP (x, 1)) == REG
3646 && GET_CODE (XEXP (x, 0)) == REG)
3649 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3652 x = legitimize_pic_address (x, 0);
3655 if (changed && legitimate_address_p (mode, x, FALSE))
3658 if (GET_CODE (XEXP (x, 0)) == REG)
3660 register rtx temp = gen_reg_rtx (Pmode);
3661 register rtx val = force_operand (XEXP (x, 1), temp);
3663 emit_move_insn (temp, val);
3669 else if (GET_CODE (XEXP (x, 1)) == REG)
3671 register rtx temp = gen_reg_rtx (Pmode);
3672 register rtx val = force_operand (XEXP (x, 0), temp);
3674 emit_move_insn (temp, val);
3684 /* Print an integer constant expression in assembler syntax. Addition
3685 and subtraction are the only arithmetic that may appear in these
3686 expressions. FILE is the stdio stream to write to, X is the rtx, and
3687 CODE is the operand print code from the output string. */
3690 output_pic_addr_const (file, x, code)
3697 switch (GET_CODE (x))
3707 assemble_name (file, XSTR (x, 0));
3708 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3709 fputs ("@PLT", file);
3716 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3717 assemble_name (asm_out_file, buf);
3721 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3725 /* This used to output parentheses around the expression,
3726 but that does not work on the 386 (either ATT or BSD assembler). */
3727 output_pic_addr_const (file, XEXP (x, 0), code);
3731 if (GET_MODE (x) == VOIDmode)
3733 /* We can use %d if the number is <32 bits and positive. */
3734 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3735 fprintf (file, "0x%lx%08lx",
3736 (unsigned long) CONST_DOUBLE_HIGH (x),
3737 (unsigned long) CONST_DOUBLE_LOW (x));
3739 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3742 /* We can't handle floating point constants;
3743 PRINT_OPERAND must handle them. */
3744 output_operand_lossage ("floating constant misused");
3748 /* Some assemblers need integer constants to appear first. */
3749 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3751 output_pic_addr_const (file, XEXP (x, 0), code);
3753 output_pic_addr_const (file, XEXP (x, 1), code);
3755 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3757 output_pic_addr_const (file, XEXP (x, 1), code);
3759 output_pic_addr_const (file, XEXP (x, 0), code);
3766 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3767 output_pic_addr_const (file, XEXP (x, 0), code);
3769 output_pic_addr_const (file, XEXP (x, 1), code);
3770 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3774 if (XVECLEN (x, 0) != 1)
3776 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3777 switch (XINT (x, 1))
3780 fputs ("@GOT", file);
3783 fputs ("@GOTOFF", file);
3786 fputs ("@PLT", file);
3789 output_operand_lossage ("invalid UNSPEC as operand");
3795 output_operand_lossage ("invalid expression as operand");
3799 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3800 We need to handle our special PIC relocations. */
3803 i386_dwarf_output_addr_const (file, x)
3807 fprintf (file, "%s", INT_ASM_OP);
3809 output_pic_addr_const (file, x, '\0');
3811 output_addr_const (file, x);
3815 /* In the name of slightly smaller debug output, and to cater to
3816 general assembler losage, recognize PIC+GOTOFF and turn it back
3817 into a direct symbol reference. */
3820 i386_simplify_dwarf_addr (orig_x)
3825 if (GET_CODE (x) != PLUS
3826 || GET_CODE (XEXP (x, 0)) != REG
3827 || GET_CODE (XEXP (x, 1)) != CONST)
3830 x = XEXP (XEXP (x, 1), 0);
3831 if (GET_CODE (x) == UNSPEC
3832 && (XINT (x, 1) == 6
3833 || XINT (x, 1) == 7))
3834 return XVECEXP (x, 0, 0);
3836 if (GET_CODE (x) == PLUS
3837 && GET_CODE (XEXP (x, 0)) == UNSPEC
3838 && GET_CODE (XEXP (x, 1)) == CONST_INT
3839 && (XINT (XEXP (x, 0), 1) == 6
3840 || XINT (XEXP (x, 0), 1) == 7))
3841 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3847 put_condition_code (code, mode, reverse, fp, file)
3849 enum machine_mode mode;
3855 if (mode == CCFPmode || mode == CCFPUmode)
3857 enum rtx_code second_code, bypass_code;
3858 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3859 if (bypass_code != NIL || second_code != NIL)
3861 code = ix86_fp_compare_code_to_integer (code);
3865 code = reverse_condition (code);
3876 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3881 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3882 Those same assemblers have the same but opposite losage on cmov. */
3885 suffix = fp ? "nbe" : "a";
3888 if (mode == CCNOmode || mode == CCGOCmode)
3890 else if (mode == CCmode || mode == CCGCmode)
3901 if (mode == CCNOmode || mode == CCGOCmode)
3903 else if (mode == CCmode || mode == CCGCmode)
3912 suffix = fp ? "nb" : "ae";
3915 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3925 suffix = fp ? "u" : "p";
3928 suffix = fp ? "nu" : "np";
3933 fputs (suffix, file);
3937 print_reg (x, code, file)
3942 if (REGNO (x) == ARG_POINTER_REGNUM
3943 || REGNO (x) == FRAME_POINTER_REGNUM
3944 || REGNO (x) == FLAGS_REG
3945 || REGNO (x) == FPSR_REG)
3948 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3951 if (code == 'w' || MMX_REG_P (x))
3953 else if (code == 'b')
3955 else if (code == 'k')
3957 else if (code == 'q')
3959 else if (code == 'y')
3961 else if (code == 'h')
3964 code = GET_MODE_SIZE (GET_MODE (x));
3966 /* Irritatingly, AMD extended registers use different naming convention
3967 from the normal registers. */
3968 if (REX_INT_REG_P (x))
3975 error ("Extended registers have no high halves\n");
3978 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3981 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3984 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3987 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3990 error ("Unsupported operand size for extended register.\n");
3998 if (STACK_TOP_P (x))
4000 fputs ("st(0)", file);
4007 if (! ANY_FP_REG_P (x))
4008 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
4012 fputs (hi_reg_name[REGNO (x)], file);
4015 fputs (qi_reg_name[REGNO (x)], file);
4018 fputs (qi_high_reg_name[REGNO (x)], file);
4026 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
4027 C -- print opcode suffix for set/cmov insn.
4028 c -- like C, but print reversed condition
4029 F,f -- likewise, but for floating-point.
4030 R -- print the prefix for register names.
4031 z -- print the opcode suffix for the size of the current operand.
4032 * -- print a star (in certain assembler syntax)
4033 A -- print an absolute memory reference.
4034 w -- print the operand as if it's a "word" (HImode) even if it isn't.
4035 s -- print a shift double count, followed by the assemblers argument
4037 b -- print the QImode name of the register for the indicated operand.
4038 %b0 would print %al if operands[0] is reg 0.
4039 w -- likewise, print the HImode name of the register.
4040 k -- likewise, print the SImode name of the register.
4041 q -- likewise, print the DImode name of the register.
4042 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
4043 y -- print "st(0)" instead of "st" as a register.
4044 D -- print condition for SSE cmp instruction.
4045 P -- if PIC, print an @PLT suffix.
4046 X -- don't print any sort of PIC '@' suffix for a symbol.
4050 print_operand (file, x, code)
4060 if (ASSEMBLER_DIALECT == 0)
4065 if (ASSEMBLER_DIALECT == 0)
4067 else if (ASSEMBLER_DIALECT == 1)
4069 /* Intel syntax. For absolute addresses, registers should not
4070 be surrounded by braces. */
4071 if (GET_CODE (x) != REG)
4074 PRINT_OPERAND (file, x, 0);
4080 PRINT_OPERAND (file, x, 0);
4085 if (ASSEMBLER_DIALECT == 0)
4090 if (ASSEMBLER_DIALECT == 0)
4095 if (ASSEMBLER_DIALECT == 0)
4100 if (ASSEMBLER_DIALECT == 0)
4105 if (ASSEMBLER_DIALECT == 0)
4110 if (ASSEMBLER_DIALECT == 0)
4115 /* 387 opcodes don't get size suffixes if the operands are
4118 if (STACK_REG_P (x))
4121 /* this is the size of op from size of operand */
4122 switch (GET_MODE_SIZE (GET_MODE (x)))
4125 #ifdef HAVE_GAS_FILDS_FISTS
4131 if (GET_MODE (x) == SFmode)
4146 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4148 #ifdef GAS_MNEMONICS
4174 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4176 PRINT_OPERAND (file, x, 0);
4182 /* Little bit of braindamage here. The SSE compare instructions
4183 does use completely different names for the comparisons that the
4184 fp conditional moves. */
4185 switch (GET_CODE (x))
4200 fputs ("unord", file);
4204 fputs ("neq", file);
4208 fputs ("nlt", file);
4212 fputs ("nle", file);
4215 fputs ("ord", file);
4223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
4226 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
4229 /* Like above, but reverse condition */
4231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
4240 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4243 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4246 int pred_val = INTVAL (XEXP (x, 0));
4248 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4249 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4251 int taken = pred_val > REG_BR_PROB_BASE / 2;
4252 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4254 /* Emit hints only in the case default branch prediction
4255 heruistics would fail. */
4256 if (taken != cputaken)
4258 /* We use 3e (DS) prefix for taken branches and
4259 2e (CS) prefix for not taken branches. */
4261 fputs ("ds ; ", file);
4263 fputs ("cs ; ", file);
4272 sprintf (str, "invalid operand code `%c'", code);
4273 output_operand_lossage (str);
4278 if (GET_CODE (x) == REG)
4280 PRINT_REG (x, code, file);
4283 else if (GET_CODE (x) == MEM)
4285 /* No `byte ptr' prefix for call instructions. */
4286 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
4289 switch (GET_MODE_SIZE (GET_MODE (x)))
4291 case 1: size = "BYTE"; break;
4292 case 2: size = "WORD"; break;
4293 case 4: size = "DWORD"; break;
4294 case 8: size = "QWORD"; break;
4295 case 12: size = "XWORD"; break;
4296 case 16: size = "XMMWORD"; break;
4301 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4304 else if (code == 'w')
4306 else if (code == 'k')
4310 fputs (" PTR ", file);
4314 if (flag_pic && CONSTANT_ADDRESS_P (x))
4315 output_pic_addr_const (file, x, code);
4316 /* Avoid (%rip) for call operands. */
4317 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4318 && GET_CODE (x) != CONST_INT)
4319 output_addr_const (file, x);
4324 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4329 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4330 REAL_VALUE_TO_TARGET_SINGLE (r, l);
4332 if (ASSEMBLER_DIALECT == 0)
4334 fprintf (file, "0x%lx", l);
4337 /* These float cases don't actually occur as immediate operands. */
4338 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4343 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4344 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4345 fprintf (file, "%s", dstr);
4348 else if (GET_CODE (x) == CONST_DOUBLE
4349 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
4354 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4355 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4356 fprintf (file, "%s", dstr);
4362 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
4364 if (ASSEMBLER_DIALECT == 0)
4367 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4368 || GET_CODE (x) == LABEL_REF)
4370 if (ASSEMBLER_DIALECT == 0)
4373 fputs ("OFFSET FLAT:", file);
4376 if (GET_CODE (x) == CONST_INT)
4377 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4379 output_pic_addr_const (file, x, code);
4381 output_addr_const (file, x);
4385 /* Print a memory operand whose address is ADDR. */
4388 print_operand_address (file, addr)
4392 struct ix86_address parts;
4393 rtx base, index, disp;
4396 if (! ix86_decompose_address (addr, &parts))
4400 index = parts.index;
4402 scale = parts.scale;
4404 if (!base && !index)
4406 /* Displacement only requires special attention. */
4408 if (GET_CODE (disp) == CONST_INT)
4410 if (ASSEMBLER_DIALECT != 0)
4412 if (USER_LABEL_PREFIX[0] == 0)
4414 fputs ("ds:", file);
4416 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
4419 output_pic_addr_const (file, addr, 0);
4421 output_addr_const (file, addr);
4423 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4424 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4425 fputs ("(%rip)", file);
4429 if (ASSEMBLER_DIALECT == 0)
4434 output_pic_addr_const (file, disp, 0);
4435 else if (GET_CODE (disp) == LABEL_REF)
4436 output_asm_label (disp);
4438 output_addr_const (file, disp);
4443 PRINT_REG (base, 0, file);
4447 PRINT_REG (index, 0, file);
4449 fprintf (file, ",%d", scale);
4455 rtx offset = NULL_RTX;
4459 /* Pull out the offset of a symbol; print any symbol itself. */
4460 if (GET_CODE (disp) == CONST
4461 && GET_CODE (XEXP (disp, 0)) == PLUS
4462 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4464 offset = XEXP (XEXP (disp, 0), 1);
4465 disp = gen_rtx_CONST (VOIDmode,
4466 XEXP (XEXP (disp, 0), 0));
4470 output_pic_addr_const (file, disp, 0);
4471 else if (GET_CODE (disp) == LABEL_REF)
4472 output_asm_label (disp);
4473 else if (GET_CODE (disp) == CONST_INT)
4476 output_addr_const (file, disp);
4482 PRINT_REG (base, 0, file);
4485 if (INTVAL (offset) >= 0)
4487 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4498 PRINT_REG (index, 0, file);
4500 fprintf (file, "*%d", scale);
4507 /* Split one or more DImode RTL references into pairs of SImode
4508 references. The RTL can be REG, offsettable MEM, integer constant, or
4509 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4510 split and "num" is its length. lo_half and hi_half are output arrays
4511 that parallel "operands". */
4514 split_di (operands, num, lo_half, hi_half)
4517 rtx lo_half[], hi_half[];
4521 rtx op = operands[num];
4522 if (CONSTANT_P (op))
4523 split_double (op, &lo_half[num], &hi_half[num]);
4524 else if (! reload_completed)
4526 lo_half[num] = gen_lowpart (SImode, op);
4527 hi_half[num] = gen_highpart (SImode, op);
4529 else if (GET_CODE (op) == REG)
4533 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4534 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
4536 else if (offsettable_memref_p (op))
4538 lo_half[num] = adjust_address (op, SImode, 0);
4539 hi_half[num] = adjust_address (op, SImode, 4);
4546 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4547 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4548 is the expression of the binary operation. The output may either be
4549 emitted here, or returned to the caller, like all output_* functions.
4551 There is no guarantee that the operands are the same mode, as they
4552 might be within FLOAT or FLOAT_EXTEND expressions. */
4554 #ifndef SYSV386_COMPAT
4555 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4556 wants to fix the assemblers because that causes incompatibility
4557 with gcc. No-one wants to fix gcc because that causes
4558 incompatibility with assemblers... You can use the option of
4559 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4560 #define SYSV386_COMPAT 1
4564 output_387_binary_op (insn, operands)
4568 static char buf[30];
4571 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
4573 #ifdef ENABLE_CHECKING
4574 /* Even if we do not want to check the inputs, this documents input
4575 constraints. Which helps in understanding the following code. */
4576 if (STACK_REG_P (operands[0])
4577 && ((REG_P (operands[1])
4578 && REGNO (operands[0]) == REGNO (operands[1])
4579 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4580 || (REG_P (operands[2])
4581 && REGNO (operands[0]) == REGNO (operands[2])
4582 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4583 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4589 switch (GET_CODE (operands[3]))
4592 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4593 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4601 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4602 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4619 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4620 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4634 if (GET_MODE (operands[0]) == SFmode)
4635 strcat (buf, "ss\t{%2, %0|%0, %2}");
4637 strcat (buf, "sd\t{%2, %0|%0, %2}");
4642 switch (GET_CODE (operands[3]))
4646 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4648 rtx temp = operands[2];
4649 operands[2] = operands[1];
4653 /* know operands[0] == operands[1]. */
4655 if (GET_CODE (operands[2]) == MEM)
4661 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4663 if (STACK_TOP_P (operands[0]))
4664 /* How is it that we are storing to a dead operand[2]?
4665 Well, presumably operands[1] is dead too. We can't
4666 store the result to st(0) as st(0) gets popped on this
4667 instruction. Instead store to operands[2] (which I
4668 think has to be st(1)). st(1) will be popped later.
4669 gcc <= 2.8.1 didn't have this check and generated
4670 assembly code that the Unixware assembler rejected. */
4671 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4673 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4677 if (STACK_TOP_P (operands[0]))
4678 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4680 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4685 if (GET_CODE (operands[1]) == MEM)
4691 if (GET_CODE (operands[2]) == MEM)
4697 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4700 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4701 derived assemblers, confusingly reverse the direction of
4702 the operation for fsub{r} and fdiv{r} when the
4703 destination register is not st(0). The Intel assembler
4704 doesn't have this brain damage. Read !SYSV386_COMPAT to
4705 figure out what the hardware really does. */
4706 if (STACK_TOP_P (operands[0]))
4707 p = "{p\t%0, %2|rp\t%2, %0}";
4709 p = "{rp\t%2, %0|p\t%0, %2}";
4711 if (STACK_TOP_P (operands[0]))
4712 /* As above for fmul/fadd, we can't store to st(0). */
4713 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4715 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4720 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4723 if (STACK_TOP_P (operands[0]))
4724 p = "{rp\t%0, %1|p\t%1, %0}";
4726 p = "{p\t%1, %0|rp\t%0, %1}";
4728 if (STACK_TOP_P (operands[0]))
4729 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4731 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4736 if (STACK_TOP_P (operands[0]))
4738 if (STACK_TOP_P (operands[1]))
4739 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4741 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4744 else if (STACK_TOP_P (operands[1]))
4747 p = "{\t%1, %0|r\t%0, %1}";
4749 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4755 p = "{r\t%2, %0|\t%0, %2}";
4757 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4770 /* Output code to initialize control word copies used by
4771 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
4772 is set to control word rounding downwards. */
4774 emit_i387_cw_initialization (normal, round_down)
4775 rtx normal, round_down;
4777 rtx reg = gen_reg_rtx (HImode);
4779 emit_insn (gen_x86_fnstcw_1 (normal));
4780 emit_move_insn (reg, normal);
4781 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
4783 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
4785 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
4786 emit_move_insn (round_down, reg);
4789 /* Output code for INSN to convert a float to a signed int. OPERANDS
4790 are the insn operands. The output may be [HSD]Imode and the input
4791 operand may be [SDX]Fmode. */
4794 output_fix_trunc (insn, operands)
4798 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4799 int dimode_p = GET_MODE (operands[0]) == DImode;
4801 /* Jump through a hoop or two for DImode, since the hardware has no
4802 non-popping instruction. We used to do this a different way, but
4803 that was somewhat fragile and broke with post-reload splitters. */
4804 if (dimode_p && !stack_top_dies)
4805 output_asm_insn ("fld\t%y1", operands);
4807 if (!STACK_TOP_P (operands[1]))
4810 if (GET_CODE (operands[0]) != MEM)
4813 output_asm_insn ("fldcw\t%3", operands);
4814 if (stack_top_dies || dimode_p)
4815 output_asm_insn ("fistp%z0\t%0", operands);
4817 output_asm_insn ("fist%z0\t%0", operands);
4818 output_asm_insn ("fldcw\t%2", operands);
4823 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4824 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4825 when fucom should be used. */
4828 output_fp_compare (insn, operands, eflags_p, unordered_p)
4831 int eflags_p, unordered_p;
4834 rtx cmp_op0 = operands[0];
4835 rtx cmp_op1 = operands[1];
4836 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4841 cmp_op1 = operands[2];
4845 if (GET_MODE (operands[0]) == SFmode)
4847 return "ucomiss\t{%1, %0|%0, %1}";
4849 return "comiss\t{%1, %0|%0, %y}";
4852 return "ucomisd\t{%1, %0|%0, %1}";
4854 return "comisd\t{%1, %0|%0, %y}";
4857 if (! STACK_TOP_P (cmp_op0))
4860 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4862 if (STACK_REG_P (cmp_op1)
4864 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4865 && REGNO (cmp_op1) != FIRST_STACK_REG)
4867 /* If both the top of the 387 stack dies, and the other operand
4868 is also a stack register that dies, then this must be a
4869 `fcompp' float compare */
4873 /* There is no double popping fcomi variant. Fortunately,
4874 eflags is immune from the fstp's cc clobbering. */
4876 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4878 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4886 return "fucompp\n\tfnstsw\t%0";
4888 return "fcompp\n\tfnstsw\t%0";
4901 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4903 static const char * const alt[24] =
4915 "fcomi\t{%y1, %0|%0, %y1}",
4916 "fcomip\t{%y1, %0|%0, %y1}",
4917 "fucomi\t{%y1, %0|%0, %y1}",
4918 "fucomip\t{%y1, %0|%0, %y1}",
4925 "fcom%z2\t%y2\n\tfnstsw\t%0",
4926 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4927 "fucom%z2\t%y2\n\tfnstsw\t%0",
4928 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4930 "ficom%z2\t%y2\n\tfnstsw\t%0",
4931 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4939 mask = eflags_p << 3;
4940 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4941 mask |= unordered_p << 1;
4942 mask |= stack_top_dies;
4954 /* Output assembler code to FILE to initialize basic-block profiling.
4956 If profile_block_flag == 2
4958 Output code to call the subroutine `__bb_init_trace_func'
4959 and pass two parameters to it. The first parameter is
4960 the address of a block allocated in the object module.
4961 The second parameter is the number of the first basic block
4964 The name of the block is a local symbol made with this statement:
4966 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4968 Of course, since you are writing the definition of
4969 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4970 can take a short cut in the definition of this macro and use the
4971 name that you know will result.
4973 The number of the first basic block of the function is
4974 passed to the macro in BLOCK_OR_LABEL.
4976 If described in a virtual assembler language the code to be
4980 parameter2 <- BLOCK_OR_LABEL
4981 call __bb_init_trace_func
4983 else if profile_block_flag != 0
4985 Output code to call the subroutine `__bb_init_func'
4986 and pass one single parameter to it, which is the same
4987 as the first parameter to `__bb_init_trace_func'.
4989 The first word of this parameter is a flag which will be nonzero if
4990 the object module has already been initialized. So test this word
4991 first, and do not call `__bb_init_func' if the flag is nonzero.
4992 Note: When profile_block_flag == 2 the test need not be done
4993 but `__bb_init_trace_func' *must* be called.
4995 BLOCK_OR_LABEL may be used to generate a label number as a
4996 branch destination in case `__bb_init_func' will not be called.
4998 If described in a virtual assembler language the code to be
5009 ix86_output_function_block_profiler (file, block_or_label)
5013 static int num_func = 0;
5015 char block_table[80], false_label[80];
5017 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
5019 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5020 xops[5] = stack_pointer_rtx;
5021 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5023 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
5025 switch (profile_block_flag)
5028 xops[2] = GEN_INT (block_or_label);
5029 xops[3] = gen_rtx_MEM (Pmode,
5030 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
5031 xops[6] = GEN_INT (8);
5033 output_asm_insn ("push{l}\t%2", xops);
5035 output_asm_insn ("push{l}\t%1", xops);
5038 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5039 output_asm_insn ("push{l}\t%7", xops);
5041 output_asm_insn ("call\t%P3", xops);
5042 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5046 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
5048 xops[0] = const0_rtx;
5049 xops[2] = gen_rtx_MEM (Pmode,
5050 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
5051 xops[3] = gen_rtx_MEM (Pmode,
5052 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
5053 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
5054 xops[6] = GEN_INT (4);
5056 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
5058 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
5059 output_asm_insn ("jne\t%2", xops);
5062 output_asm_insn ("push{l}\t%1", xops);
5065 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
5066 output_asm_insn ("push{l}\t%7", xops);
5068 output_asm_insn ("call\t%P3", xops);
5069 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
5070 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
5076 /* Output assembler code to FILE to increment a counter associated
5077 with basic block number BLOCKNO.
5079 If profile_block_flag == 2
5081 Output code to initialize the global structure `__bb' and
5082 call the function `__bb_trace_func' which will increment the
5085 `__bb' consists of two words. In the first word the number
5086 of the basic block has to be stored. In the second word
5087 the address of a block allocated in the object module
5090 The basic block number is given by BLOCKNO.
5092 The address of the block is given by the label created with
5094 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
5096 by FUNCTION_BLOCK_PROFILER.
5098 Of course, since you are writing the definition of
5099 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5100 can take a short cut in the definition of this macro and use the
5101 name that you know will result.
5103 If described in a virtual assembler language the code to be
5106 move BLOCKNO -> (__bb)
5107 move LPBX0 -> (__bb+4)
5108 call __bb_trace_func
5110 Note that function `__bb_trace_func' must not change the
5111 machine state, especially the flag register. To grant
5112 this, you must output code to save and restore registers
5113 either in this macro or in the macros MACHINE_STATE_SAVE
5114 and MACHINE_STATE_RESTORE. The last two macros will be
5115 used in the function `__bb_trace_func', so you must make
5116 sure that the function prologue does not change any
5117 register prior to saving it with MACHINE_STATE_SAVE.
5119 else if profile_block_flag != 0
5121 Output code to increment the counter directly.
5122 Basic blocks are numbered separately from zero within each
5123 compiled object module. The count associated with block number
5124 BLOCKNO is at index BLOCKNO in an array of words; the name of
5125 this array is a local symbol made with this statement:
5127 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
5129 Of course, since you are writing the definition of
5130 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
5131 can take a short cut in the definition of this macro and use the
5132 name that you know will result.
5134 If described in a virtual assembler language the code to be
5137 inc (LPBX2+4*BLOCKNO)
5141 ix86_output_block_profiler (file, blockno)
5142 FILE *file ATTRIBUTE_UNUSED;
5145 rtx xops[8], cnt_rtx;
5147 char *block_table = counts;
5149 switch (profile_block_flag)
5152 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
5154 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5155 xops[2] = GEN_INT (blockno);
5156 xops[3] = gen_rtx_MEM (Pmode,
5157 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5158 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5159 xops[5] = plus_constant (xops[4], 4);
5160 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5161 xops[6] = gen_rtx_MEM (SImode, xops[5]);
5163 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
5165 output_asm_insn ("pushf", xops);
5166 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5169 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5170 output_asm_insn ("push{l}\t%7", xops);
5171 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5172 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5173 output_asm_insn ("pop{l}\t%7", xops);
5176 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5177 output_asm_insn ("call\t%P3", xops);
5178 output_asm_insn ("popf", xops);
5183 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5184 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5185 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
5188 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
5191 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
5193 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5194 output_asm_insn ("inc{l}\t%0", xops);
5201 ix86_expand_move (mode, operands)
5202 enum machine_mode mode;
5205 int strict = (reload_in_progress || reload_completed);
5208 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
5210 /* Emit insns to move operands[1] into operands[0]. */
5212 if (GET_CODE (operands[0]) == MEM)
5213 operands[1] = force_reg (Pmode, operands[1]);
5216 rtx temp = operands[0];
5217 if (GET_CODE (temp) != REG)
5218 temp = gen_reg_rtx (Pmode);
5219 temp = legitimize_pic_address (operands[1], temp);
5220 if (temp == operands[0])
5227 if (GET_CODE (operands[0]) == MEM
5228 && (GET_MODE (operands[0]) == QImode
5229 || !push_operand (operands[0], mode))
5230 && GET_CODE (operands[1]) == MEM)
5231 operands[1] = force_reg (mode, operands[1]);
5233 if (push_operand (operands[0], mode)
5234 && ! general_no_elim_operand (operands[1], mode))
5235 operands[1] = copy_to_mode_reg (mode, operands[1]);
5237 if (FLOAT_MODE_P (mode))
5239 /* If we are loading a floating point constant to a register,
5240 force the value to memory now, since we'll get better code
5241 out the back end. */
5245 else if (GET_CODE (operands[1]) == CONST_DOUBLE
5246 && register_operand (operands[0], mode))
5247 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
5251 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
5256 /* Attempt to expand a binary operator. Make the expansion closer to the
5257 actual machine, then just general_operand, which will allow 3 separate
5258 memory references (one output, two input) in a single insn. */
5261 ix86_expand_binary_operator (code, mode, operands)
5263 enum machine_mode mode;
5266 int matching_memory;
5267 rtx src1, src2, dst, op, clob;
5273 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5274 if (GET_RTX_CLASS (code) == 'c'
5275 && (rtx_equal_p (dst, src2)
5276 || immediate_operand (src1, mode)))
5283 /* If the destination is memory, and we do not have matching source
5284 operands, do things in registers. */
5285 matching_memory = 0;
5286 if (GET_CODE (dst) == MEM)
5288 if (rtx_equal_p (dst, src1))
5289 matching_memory = 1;
5290 else if (GET_RTX_CLASS (code) == 'c'
5291 && rtx_equal_p (dst, src2))
5292 matching_memory = 2;
5294 dst = gen_reg_rtx (mode);
5297 /* Both source operands cannot be in memory. */
5298 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5300 if (matching_memory != 2)
5301 src2 = force_reg (mode, src2);
5303 src1 = force_reg (mode, src1);
5306 /* If the operation is not commutable, source 1 cannot be a constant
5307 or non-matching memory. */
5308 if ((CONSTANT_P (src1)
5309 || (!matching_memory && GET_CODE (src1) == MEM))
5310 && GET_RTX_CLASS (code) != 'c')
5311 src1 = force_reg (mode, src1);
5313 /* If optimizing, copy to regs to improve CSE */
5314 if (optimize && ! no_new_pseudos)
5316 if (GET_CODE (dst) == MEM)
5317 dst = gen_reg_rtx (mode);
5318 if (GET_CODE (src1) == MEM)
5319 src1 = force_reg (mode, src1);
5320 if (GET_CODE (src2) == MEM)
5321 src2 = force_reg (mode, src2);
5324 /* Emit the instruction. */
5326 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5327 if (reload_in_progress)
5329 /* Reload doesn't know about the flags register, and doesn't know that
5330 it doesn't want to clobber it. We can only do this with PLUS. */
5337 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5338 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5341 /* Fix up the destination if needed. */
5342 if (dst != operands[0])
5343 emit_move_insn (operands[0], dst);
5346 /* Return TRUE or FALSE depending on whether the binary operator meets the
5347 appropriate constraints. */
5350 ix86_binary_operator_ok (code, mode, operands)
5352 enum machine_mode mode ATTRIBUTE_UNUSED;
5355 /* Both source operands cannot be in memory. */
5356 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5358 /* If the operation is not commutable, source 1 cannot be a constant. */
5359 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5361 /* If the destination is memory, we must have a matching source operand. */
5362 if (GET_CODE (operands[0]) == MEM
5363 && ! (rtx_equal_p (operands[0], operands[1])
5364 || (GET_RTX_CLASS (code) == 'c'
5365 && rtx_equal_p (operands[0], operands[2]))))
5367 /* If the operation is not commutable and the source 1 is memory, we must
5368 have a matching destionation. */
5369 if (GET_CODE (operands[1]) == MEM
5370 && GET_RTX_CLASS (code) != 'c'
5371 && ! rtx_equal_p (operands[0], operands[1]))
5376 /* Attempt to expand a unary operator. Make the expansion closer to the
5377 actual machine, then just general_operand, which will allow 2 separate
5378 memory references (one output, one input) in a single insn. */
5381 ix86_expand_unary_operator (code, mode, operands)
5383 enum machine_mode mode;
5386 int matching_memory;
5387 rtx src, dst, op, clob;
5392 /* If the destination is memory, and we do not have matching source
5393 operands, do things in registers. */
5394 matching_memory = 0;
5395 if (GET_CODE (dst) == MEM)
5397 if (rtx_equal_p (dst, src))
5398 matching_memory = 1;
5400 dst = gen_reg_rtx (mode);
5403 /* When source operand is memory, destination must match. */
5404 if (!matching_memory && GET_CODE (src) == MEM)
5405 src = force_reg (mode, src);
5407 /* If optimizing, copy to regs to improve CSE */
5408 if (optimize && ! no_new_pseudos)
5410 if (GET_CODE (dst) == MEM)
5411 dst = gen_reg_rtx (mode);
5412 if (GET_CODE (src) == MEM)
5413 src = force_reg (mode, src);
5416 /* Emit the instruction. */
5418 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5419 if (reload_in_progress || code == NOT)
5421 /* Reload doesn't know about the flags register, and doesn't know that
5422 it doesn't want to clobber it. */
5429 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5430 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5433 /* Fix up the destination if needed. */
5434 if (dst != operands[0])
5435 emit_move_insn (operands[0], dst);
5438 /* Return TRUE or FALSE depending on whether the unary operator meets the
5439 appropriate constraints. */
5442 ix86_unary_operator_ok (code, mode, operands)
5443 enum rtx_code code ATTRIBUTE_UNUSED;
5444 enum machine_mode mode ATTRIBUTE_UNUSED;
5445 rtx operands[2] ATTRIBUTE_UNUSED;
5447 /* If one of operands is memory, source and destination must match. */
5448 if ((GET_CODE (operands[0]) == MEM
5449 || GET_CODE (operands[1]) == MEM)
5450 && ! rtx_equal_p (operands[0], operands[1]))
5455 /* Return TRUE or FALSE depending on whether the first SET in INSN
5456 has source and destination with matching CC modes, and that the
5457 CC mode is at least as constrained as REQ_MODE. */
5460 ix86_match_ccmode (insn, req_mode)
5462 enum machine_mode req_mode;
5465 enum machine_mode set_mode;
5467 set = PATTERN (insn);
5468 if (GET_CODE (set) == PARALLEL)
5469 set = XVECEXP (set, 0, 0);
5470 if (GET_CODE (set) != SET)
5472 if (GET_CODE (SET_SRC (set)) != COMPARE)
5475 set_mode = GET_MODE (SET_DEST (set));
5479 if (req_mode != CCNOmode
5480 && (req_mode != CCmode
5481 || XEXP (SET_SRC (set), 1) != const0_rtx))
5485 if (req_mode == CCGCmode)
5489 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5493 if (req_mode == CCZmode)
5503 return (GET_MODE (SET_SRC (set)) == set_mode);
5506 /* Generate insn patterns to do an integer compare of OPERANDS. */
5509 ix86_expand_int_compare (code, op0, op1)
5513 enum machine_mode cmpmode;
5516 cmpmode = SELECT_CC_MODE (code, op0, op1);
5517 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5519 /* This is very simple, but making the interface the same as in the
5520 FP case makes the rest of the code easier. */
5521 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5522 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5524 /* Return the test that should be put into the flags user, i.e.
5525 the bcc, scc, or cmov instruction. */
5526 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5529 /* Figure out whether to use ordered or unordered fp comparisons.
5530 Return the appropriate mode to use. */
5533 ix86_fp_compare_mode (code)
5534 enum rtx_code code ATTRIBUTE_UNUSED;
5536 /* ??? In order to make all comparisons reversible, we do all comparisons
5537 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5538 all forms trapping and nontrapping comparisons, we can make inequality
5539 comparisons trapping again, since it results in better code when using
5540 FCOM based compares. */
5541 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
5545 ix86_cc_mode (code, op0, op1)
5549 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5550 return ix86_fp_compare_mode (code);
5553 /* Only zero flag is needed. */
5555 case NE: /* ZF!=0 */
5557 /* Codes needing carry flag. */
5558 case GEU: /* CF=0 */
5559 case GTU: /* CF=0 & ZF=0 */
5560 case LTU: /* CF=1 */
5561 case LEU: /* CF=1 | ZF=1 */
5563 /* Codes possibly doable only with sign flag when
5564 comparing against zero. */
5565 case GE: /* SF=OF or SF=0 */
5566 case LT: /* SF<>OF or SF=1 */
5567 if (op1 == const0_rtx)
5570 /* For other cases Carry flag is not required. */
5572 /* Codes doable only with sign flag when comparing
5573 against zero, but we miss jump instruction for it
5574 so we need to use relational tests agains overflow
5575 that thus needs to be zero. */
5576 case GT: /* ZF=0 & SF=OF */
5577 case LE: /* ZF=1 | SF<>OF */
5578 if (op1 == const0_rtx)
5587 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5590 ix86_use_fcomi_compare (code)
5591 enum rtx_code code ATTRIBUTE_UNUSED;
5593 enum rtx_code swapped_code = swap_condition (code);
5594 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5595 || (ix86_fp_comparison_cost (swapped_code)
5596 == ix86_fp_comparison_fcomi_cost (swapped_code)));
5599 /* Swap, force into registers, or otherwise massage the two operands
5600 to a fp comparison. The operands are updated in place; the new
5601 comparsion code is returned. */
5603 static enum rtx_code
5604 ix86_prepare_fp_compare_args (code, pop0, pop1)
5608 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5609 rtx op0 = *pop0, op1 = *pop1;
5610 enum machine_mode op_mode = GET_MODE (op0);
5611 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
5613 /* All of the unordered compare instructions only work on registers.
5614 The same is true of the XFmode compare instructions. The same is
5615 true of the fcomi compare instructions. */
5618 && (fpcmp_mode == CCFPUmode
5619 || op_mode == XFmode
5620 || op_mode == TFmode
5621 || ix86_use_fcomi_compare (code)))
5623 op0 = force_reg (op_mode, op0);
5624 op1 = force_reg (op_mode, op1);
5628 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5629 things around if they appear profitable, otherwise force op0
5632 if (standard_80387_constant_p (op0) == 0
5633 || (GET_CODE (op0) == MEM
5634 && ! (standard_80387_constant_p (op1) == 0
5635 || GET_CODE (op1) == MEM)))
5638 tmp = op0, op0 = op1, op1 = tmp;
5639 code = swap_condition (code);
5642 if (GET_CODE (op0) != REG)
5643 op0 = force_reg (op_mode, op0);
5645 if (CONSTANT_P (op1))
5647 if (standard_80387_constant_p (op1))
5648 op1 = force_reg (op_mode, op1);
5650 op1 = validize_mem (force_const_mem (op_mode, op1));
5654 /* Try to rearrange the comparison to make it cheaper. */
5655 if (ix86_fp_comparison_cost (code)
5656 > ix86_fp_comparison_cost (swap_condition (code))
5657 && (GET_CODE (op0) == REG || !reload_completed))
5660 tmp = op0, op0 = op1, op1 = tmp;
5661 code = swap_condition (code);
5662 if (GET_CODE (op0) != REG)
5663 op0 = force_reg (op_mode, op0);
5671 /* Convert comparison codes we use to represent FP comparison to integer
5672 code that will result in proper branch. Return UNKNOWN if no such code
5674 static enum rtx_code
5675 ix86_fp_compare_code_to_integer (code)
5705 /* Split comparison code CODE into comparisons we can do using branch
5706 instructions. BYPASS_CODE is comparison code for branch that will
5707 branch around FIRST_CODE and SECOND_CODE. If some of branches
5708 is not required, set value to NIL.
5709 We never require more than two branches. */
5711 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5712 enum rtx_code code, *bypass_code, *first_code, *second_code;
5718 /* The fcomi comparison sets flags as follows:
5728 case GT: /* GTU - CF=0 & ZF=0 */
5729 case GE: /* GEU - CF=0 */
5730 case ORDERED: /* PF=0 */
5731 case UNORDERED: /* PF=1 */
5732 case UNEQ: /* EQ - ZF=1 */
5733 case UNLT: /* LTU - CF=1 */
5734 case UNLE: /* LEU - CF=1 | ZF=1 */
5735 case LTGT: /* EQ - ZF=0 */
5737 case LT: /* LTU - CF=1 - fails on unordered */
5739 *bypass_code = UNORDERED;
5741 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5743 *bypass_code = UNORDERED;
5745 case EQ: /* EQ - ZF=1 - fails on unordered */
5747 *bypass_code = UNORDERED;
5749 case NE: /* NE - ZF=0 - fails on unordered */
5751 *second_code = UNORDERED;
5753 case UNGE: /* GEU - CF=0 - fails on unordered */
5755 *second_code = UNORDERED;
5757 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5759 *second_code = UNORDERED;
5764 if (!TARGET_IEEE_FP)
5771 /* Return cost of comparison done fcom + arithmetics operations on AX.
5772 All following functions do use number of instructions as an cost metrics.
5773 In future this should be tweaked to compute bytes for optimize_size and
5774 take into account performance of various instructions on various CPUs. */
5776 ix86_fp_comparison_arithmetics_cost (code)
5779 if (!TARGET_IEEE_FP)
5781 /* The cost of code output by ix86_expand_fp_compare. */
5809 /* Return cost of comparison done using fcomi operation.
5810 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5812 ix86_fp_comparison_fcomi_cost (code)
5815 enum rtx_code bypass_code, first_code, second_code;
5816 /* Return arbitarily high cost when instruction is not supported - this
5817 prevents gcc from using it. */
5820 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5821 return (bypass_code != NIL || second_code != NIL) + 2;
5824 /* Return cost of comparison done using sahf operation.
5825 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5827 ix86_fp_comparison_sahf_cost (code)
5830 enum rtx_code bypass_code, first_code, second_code;
5831 /* Return arbitarily high cost when instruction is not preferred - this
5832 avoids gcc from using it. */
5833 if (!TARGET_USE_SAHF && !optimize_size)
5835 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5836 return (bypass_code != NIL || second_code != NIL) + 3;
5839 /* Compute cost of the comparison done using any method.
5840 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5842 ix86_fp_comparison_cost (code)
5845 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5848 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5849 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5851 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5852 if (min > sahf_cost)
5854 if (min > fcomi_cost)
5859 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5862 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5864 rtx op0, op1, scratch;
5868 enum machine_mode fpcmp_mode, intcmp_mode;
5870 int cost = ix86_fp_comparison_cost (code);
5871 enum rtx_code bypass_code, first_code, second_code;
5873 fpcmp_mode = ix86_fp_compare_mode (code);
5874 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5877 *second_test = NULL_RTX;
5879 *bypass_test = NULL_RTX;
5881 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5883 /* Do fcomi/sahf based test when profitable. */
5884 if ((bypass_code == NIL || bypass_test)
5885 && (second_code == NIL || second_test)
5886 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5890 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5891 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5897 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5898 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5900 scratch = gen_reg_rtx (HImode);
5901 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5902 emit_insn (gen_x86_sahf_1 (scratch));
5905 /* The FP codes work out to act like unsigned. */
5906 intcmp_mode = fpcmp_mode;
5908 if (bypass_code != NIL)
5909 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5910 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5912 if (second_code != NIL)
5913 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5914 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5919 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5920 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5921 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5923 scratch = gen_reg_rtx (HImode);
5924 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5926 /* In the unordered case, we have to check C2 for NaN's, which
5927 doesn't happen to work out to anything nice combination-wise.
5928 So do some bit twiddling on the value we've got in AH to come
5929 up with an appropriate set of condition codes. */
5931 intcmp_mode = CCNOmode;
5936 if (code == GT || !TARGET_IEEE_FP)
5938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5943 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5944 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5945 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5946 intcmp_mode = CCmode;
5952 if (code == LT && TARGET_IEEE_FP)
5954 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5955 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5956 intcmp_mode = CCmode;
5961 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5967 if (code == GE || !TARGET_IEEE_FP)
5969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5975 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5982 if (code == LE && TARGET_IEEE_FP)
5984 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5985 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5986 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5987 intcmp_mode = CCmode;
5992 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5998 if (code == EQ && TARGET_IEEE_FP)
6000 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
6001 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
6002 intcmp_mode = CCmode;
6007 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6014 if (code == NE && TARGET_IEEE_FP)
6016 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
6017 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
6023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
6029 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
6042 /* Return the test that should be put into the flags user, i.e.
6043 the bcc, scc, or cmov instruction. */
6044 return gen_rtx_fmt_ee (code, VOIDmode,
6045 gen_rtx_REG (intcmp_mode, FLAGS_REG),
6050 ix86_expand_compare (code, second_test, bypass_test)
6052 rtx *second_test, *bypass_test;
6055 op0 = ix86_compare_op0;
6056 op1 = ix86_compare_op1;
6059 *second_test = NULL_RTX;
6061 *bypass_test = NULL_RTX;
6063 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6064 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
6065 second_test, bypass_test);
6067 ret = ix86_expand_int_compare (code, op0, op1);
6072 /* Return true if the CODE will result in nontrivial jump sequence. */
6074 ix86_fp_jump_nontrivial_p (code)
6077 enum rtx_code bypass_code, first_code, second_code;
6080 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6081 return bypass_code != NIL || second_code != NIL;
6085 ix86_expand_branch (code, label)
6091 switch (GET_MODE (ix86_compare_op0))
6097 tmp = ix86_expand_compare (code, NULL, NULL);
6098 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6099 gen_rtx_LABEL_REF (VOIDmode, label),
6101 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6111 enum rtx_code bypass_code, first_code, second_code;
6113 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
6116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
6118 /* Check whether we will use the natural sequence with one jump. If
6119 so, we can expand jump early. Otherwise delay expansion by
6120 creating compound insn to not confuse optimizers. */
6121 if (bypass_code == NIL && second_code == NIL
6124 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
6125 gen_rtx_LABEL_REF (VOIDmode, label),
6130 tmp = gen_rtx_fmt_ee (code, VOIDmode,
6131 ix86_compare_op0, ix86_compare_op1);
6132 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6133 gen_rtx_LABEL_REF (VOIDmode, label),
6135 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
6137 use_fcomi = ix86_use_fcomi_compare (code);
6138 vec = rtvec_alloc (3 + !use_fcomi);
6139 RTVEC_ELT (vec, 0) = tmp;
6141 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
6143 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
6146 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
6148 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
6156 /* Expand DImode branch into multiple compare+branch. */
6158 rtx lo[2], hi[2], label2;
6159 enum rtx_code code1, code2, code3;
6161 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
6163 tmp = ix86_compare_op0;
6164 ix86_compare_op0 = ix86_compare_op1;
6165 ix86_compare_op1 = tmp;
6166 code = swap_condition (code);
6168 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6169 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
6171 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6172 avoid two branches. This costs one extra insn, so disable when
6173 optimizing for size. */
6175 if ((code == EQ || code == NE)
6177 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6182 if (hi[1] != const0_rtx)
6183 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6184 NULL_RTX, 0, OPTAB_WIDEN);
6187 if (lo[1] != const0_rtx)
6188 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6189 NULL_RTX, 0, OPTAB_WIDEN);
6191 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6192 NULL_RTX, 0, OPTAB_WIDEN);
6194 ix86_compare_op0 = tmp;
6195 ix86_compare_op1 = const0_rtx;
6196 ix86_expand_branch (code, label);
6200 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6201 op1 is a constant and the low word is zero, then we can just
6202 examine the high word. */
6204 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6207 case LT: case LTU: case GE: case GEU:
6208 ix86_compare_op0 = hi[0];
6209 ix86_compare_op1 = hi[1];
6210 ix86_expand_branch (code, label);
6216 /* Otherwise, we need two or three jumps. */
6218 label2 = gen_label_rtx ();
6221 code2 = swap_condition (code);
6222 code3 = unsigned_condition (code);
6226 case LT: case GT: case LTU: case GTU:
6229 case LE: code1 = LT; code2 = GT; break;
6230 case GE: code1 = GT; code2 = LT; break;
6231 case LEU: code1 = LTU; code2 = GTU; break;
6232 case GEU: code1 = GTU; code2 = LTU; break;
6234 case EQ: code1 = NIL; code2 = NE; break;
6235 case NE: code2 = NIL; break;
6243 * if (hi(a) < hi(b)) goto true;
6244 * if (hi(a) > hi(b)) goto false;
6245 * if (lo(a) < lo(b)) goto true;
6249 ix86_compare_op0 = hi[0];
6250 ix86_compare_op1 = hi[1];
6253 ix86_expand_branch (code1, label);
6255 ix86_expand_branch (code2, label2);
6257 ix86_compare_op0 = lo[0];
6258 ix86_compare_op1 = lo[1];
6259 ix86_expand_branch (code3, label);
6262 emit_label (label2);
6271 /* Split branch based on floating point condition. */
6273 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
6275 rtx op1, op2, target1, target2, tmp;
6278 rtx label = NULL_RTX;
6280 int bypass_probability = -1, second_probability = -1, probability = -1;
6283 if (target2 != pc_rtx)
6286 code = reverse_condition_maybe_unordered (code);
6291 condition = ix86_expand_fp_compare (code, op1, op2,
6292 tmp, &second, &bypass);
6294 if (split_branch_probability >= 0)
6296 /* Distribute the probabilities across the jumps.
6297 Assume the BYPASS and SECOND to be always test
6299 probability = split_branch_probability;
6301 /* Value of 1 is low enought to make no need for probability
6302 to be updated. Later we may run some experiments and see
6303 if unordered values are more frequent in practice. */
6305 bypass_probability = 1;
6307 second_probability = 1;
6309 if (bypass != NULL_RTX)
6311 label = gen_label_rtx ();
6312 i = emit_jump_insn (gen_rtx_SET
6314 gen_rtx_IF_THEN_ELSE (VOIDmode,
6316 gen_rtx_LABEL_REF (VOIDmode,
6319 if (bypass_probability >= 0)
6321 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6322 GEN_INT (bypass_probability),
6325 i = emit_jump_insn (gen_rtx_SET
6327 gen_rtx_IF_THEN_ELSE (VOIDmode,
6328 condition, target1, target2)));
6329 if (probability >= 0)
6331 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6332 GEN_INT (probability),
6334 if (second != NULL_RTX)
6336 i = emit_jump_insn (gen_rtx_SET
6338 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
6340 if (second_probability >= 0)
6342 = gen_rtx_EXPR_LIST (REG_BR_PROB,
6343 GEN_INT (second_probability),
6346 if (label != NULL_RTX)
6351 ix86_expand_setcc (code, dest)
6355 rtx ret, tmp, tmpreg;
6356 rtx second_test, bypass_test;
6359 if (GET_MODE (ix86_compare_op0) == DImode
6361 return 0; /* FAIL */
6363 /* Three modes of generation:
6364 0 -- destination does not overlap compare sources:
6365 clear dest first, emit strict_low_part setcc.
6366 1 -- destination does overlap compare sources:
6367 emit subreg setcc, zero extend.
6368 2 -- destination is in QImode:
6371 We don't use mode 0 early in compilation because it confuses CSE.
6372 There are peepholes to turn mode 1 into mode 0 if things work out
6373 nicely after reload. */
6375 type = cse_not_expected ? 0 : 1;
6377 if (GET_MODE (dest) == QImode)
6379 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
6380 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
6384 emit_move_insn (dest, const0_rtx);
6386 ret = ix86_expand_compare (code, &second_test, &bypass_test);
6387 PUT_MODE (ret, QImode);
6393 tmp = gen_lowpart (QImode, dest);
6395 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6399 if (!cse_not_expected)
6400 tmp = gen_reg_rtx (QImode);
6402 tmp = gen_lowpart (QImode, dest);
6406 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
6407 if (bypass_test || second_test)
6409 rtx test = second_test;
6411 rtx tmp2 = gen_reg_rtx (QImode);
6418 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6420 PUT_MODE (test, QImode);
6421 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6424 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6426 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6433 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6434 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6435 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6436 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6440 return 1; /* DONE */
6444 ix86_expand_int_movcc (operands)
6447 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6448 rtx compare_seq, compare_op;
6449 rtx second_test, bypass_test;
6451 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6452 In case comparsion is done with immediate, we can convert it to LTU or
6453 GEU by altering the integer. */
6455 if ((code == LEU || code == GTU)
6456 && GET_CODE (ix86_compare_op1) == CONST_INT
6457 && GET_MODE (operands[0]) != HImode
6458 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
6459 && GET_CODE (operands[2]) == CONST_INT
6460 && GET_CODE (operands[3]) == CONST_INT)
6466 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6470 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6471 compare_seq = gen_sequence ();
6474 compare_code = GET_CODE (compare_op);
6476 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6477 HImode insns, we'd be swallowed in word prefix ops. */
6479 if (GET_MODE (operands[0]) != HImode
6480 && GET_MODE (operands[0]) != DImode
6481 && GET_CODE (operands[2]) == CONST_INT
6482 && GET_CODE (operands[3]) == CONST_INT)
6484 rtx out = operands[0];
6485 HOST_WIDE_INT ct = INTVAL (operands[2]);
6486 HOST_WIDE_INT cf = INTVAL (operands[3]);
6489 if ((compare_code == LTU || compare_code == GEU)
6490 && !second_test && !bypass_test)
6493 /* Detect overlap between destination and compare sources. */
6496 /* To simplify rest of code, restrict to the GEU case. */
6497 if (compare_code == LTU)
6502 compare_code = reverse_condition (compare_code);
6503 code = reverse_condition (code);
6507 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
6508 || reg_overlap_mentioned_p (out, ix86_compare_op1))
6509 tmp = gen_reg_rtx (SImode);
6511 emit_insn (compare_seq);
6512 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6524 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6535 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
6537 else if (diff == -1 && ct)
6547 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6549 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
6556 * andl cf - ct, dest
6561 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
6562 (cf - ct, SImode))));
6564 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6568 emit_move_insn (out, tmp);
6570 return 1; /* DONE */
6577 tmp = ct, ct = cf, cf = tmp;
6579 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6581 /* We may be reversing unordered compare to normal compare, that
6582 is not valid in general (we may convert non-trapping condition
6583 to trapping one), however on i386 we currently emit all
6584 comparisons unordered. */
6585 compare_code = reverse_condition_maybe_unordered (compare_code);
6586 code = reverse_condition_maybe_unordered (code);
6590 compare_code = reverse_condition (compare_code);
6591 code = reverse_condition (code);
6594 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6595 || diff == 3 || diff == 5 || diff == 9)
6601 * lea cf(dest*(ct-cf)),dest
6605 * This also catches the degenerate setcc-only case.
6611 out = emit_store_flag (out, code, ix86_compare_op0,
6612 ix86_compare_op1, VOIDmode, 0, 1);
6615 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6616 done in proper mode to match. */
6619 if (Pmode != SImode)
6620 tmp = gen_lowpart (Pmode, out);
6627 if (Pmode != SImode)
6628 out1 = gen_lowpart (Pmode, out);
6631 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
6635 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
6641 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
6645 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
6647 if (Pmode != SImode)
6648 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6650 /* ??? We should to take care for outputing non-lea arithmetics
6651 for Pmode != SImode case too, but it is quite tricky and not
6652 too important, since all TARGET_64BIT machines support real
6653 conditional moves. */
6654 if (nops == 1 && Pmode == SImode)
6658 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6659 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6661 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6662 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6666 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6668 if (out != operands[0])
6669 emit_move_insn (operands[0], out);
6671 return 1; /* DONE */
6675 * General case: Jumpful:
6676 * xorl dest,dest cmpl op1, op2
6677 * cmpl op1, op2 movl ct, dest
6679 * decl dest movl cf, dest
6680 * andl (cf-ct),dest 1:
6685 * This is reasonably steep, but branch mispredict costs are
6686 * high on modern cpus, so consider failing only if optimizing
6689 * %%% Parameterize branch_cost on the tuning architecture, then
6690 * use that. The 80386 couldn't care less about mispredicts.
6693 if (!optimize_size && !TARGET_CMOVE)
6699 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6701 /* We may be reversing unordered compare to normal compare,
6702 that is not valid in general (we may convert non-trapping
6703 condition to trapping one), however on i386 we currently
6704 emit all comparisons unordered. */
6705 compare_code = reverse_condition_maybe_unordered (compare_code);
6706 code = reverse_condition_maybe_unordered (code);
6710 compare_code = reverse_condition (compare_code);
6711 code = reverse_condition (code);
6715 out = emit_store_flag (out, code, ix86_compare_op0,
6716 ix86_compare_op1, VOIDmode, 0, 1);
6718 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6719 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6720 (cf - ct, SImode))));
6722 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6723 if (out != operands[0])
6724 emit_move_insn (operands[0], out);
6726 return 1; /* DONE */
6732 /* Try a few things more with specific constants and a variable. */
6735 rtx var, orig_out, out, tmp;
6738 return 0; /* FAIL */
6740 /* If one of the two operands is an interesting constant, load a
6741 constant with the above and mask it in with a logical operation. */
6743 if (GET_CODE (operands[2]) == CONST_INT)
6746 if (INTVAL (operands[2]) == 0)
6747 operands[3] = constm1_rtx, op = and_optab;
6748 else if (INTVAL (operands[2]) == -1)
6749 operands[3] = const0_rtx, op = ior_optab;
6751 return 0; /* FAIL */
6753 else if (GET_CODE (operands[3]) == CONST_INT)
6756 if (INTVAL (operands[3]) == 0)
6757 operands[2] = constm1_rtx, op = and_optab;
6758 else if (INTVAL (operands[3]) == -1)
6759 operands[2] = const0_rtx, op = ior_optab;
6761 return 0; /* FAIL */
6764 return 0; /* FAIL */
6766 orig_out = operands[0];
6767 tmp = gen_reg_rtx (GET_MODE (orig_out));
6770 /* Recurse to get the constant loaded. */
6771 if (ix86_expand_int_movcc (operands) == 0)
6772 return 0; /* FAIL */
6774 /* Mask in the interesting variable. */
6775 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6777 if (out != orig_out)
6778 emit_move_insn (orig_out, out);
6780 return 1; /* DONE */
6784 * For comparison with above,
6794 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6795 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6796 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6797 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6799 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6801 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6802 emit_move_insn (tmp, operands[3]);
6805 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6807 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6808 emit_move_insn (tmp, operands[2]);
6811 if (! register_operand (operands[2], VOIDmode)
6812 && ! register_operand (operands[3], VOIDmode))
6813 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6815 emit_insn (compare_seq);
6816 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6817 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6818 compare_op, operands[2],
6821 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6822 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6828 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6833 return 1; /* DONE */
6837 ix86_expand_fp_movcc (operands)
6842 rtx compare_op, second_test, bypass_test;
6844 /* For SF/DFmode conditional moves based on comparisons
6845 in same mode, we may want to use SSE min/max instructions. */
6846 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6847 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6848 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6849 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6851 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
6852 /* We may be called from the post-reload splitter. */
6853 && (!REG_P (operands[0])
6854 || SSE_REG_P (operands[0])
6855 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6857 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6858 code = GET_CODE (operands[1]);
6860 /* See if we have (cross) match between comparison operands and
6861 conditional move operands. */
6862 if (rtx_equal_p (operands[2], op1))
6867 code = reverse_condition_maybe_unordered (code);
6869 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6871 /* Check for min operation. */
6874 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6875 if (memory_operand (op0, VOIDmode))
6876 op0 = force_reg (GET_MODE (operands[0]), op0);
6877 if (GET_MODE (operands[0]) == SFmode)
6878 emit_insn (gen_minsf3 (operands[0], op0, op1));
6880 emit_insn (gen_mindf3 (operands[0], op0, op1));
6883 /* Check for max operation. */
6886 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6887 if (memory_operand (op0, VOIDmode))
6888 op0 = force_reg (GET_MODE (operands[0]), op0);
6889 if (GET_MODE (operands[0]) == SFmode)
6890 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6892 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6896 /* Manage condition to be sse_comparison_operator. In case we are
6897 in non-ieee mode, try to canonicalize the destination operand
6898 to be first in the comparison - this helps reload to avoid extra
6900 if (!sse_comparison_operator (operands[1], VOIDmode)
6901 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6903 rtx tmp = ix86_compare_op0;
6904 ix86_compare_op0 = ix86_compare_op1;
6905 ix86_compare_op1 = tmp;
6906 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6907 VOIDmode, ix86_compare_op0,
6910 /* Similary try to manage result to be first operand of conditional
6911 move. We also don't support the NE comparison on SSE, so try to
6913 if ((rtx_equal_p (operands[0], operands[3])
6914 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6915 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
6917 rtx tmp = operands[2];
6918 operands[2] = operands[3];
6920 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6921 (GET_CODE (operands[1])),
6922 VOIDmode, ix86_compare_op0,
6925 if (GET_MODE (operands[0]) == SFmode)
6926 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6927 operands[2], operands[3],
6928 ix86_compare_op0, ix86_compare_op1));
6930 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6931 operands[2], operands[3],
6932 ix86_compare_op0, ix86_compare_op1));
6936 /* The floating point conditional move instructions don't directly
6937 support conditions resulting from a signed integer comparison. */
6939 code = GET_CODE (operands[1]);
6940 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6942 /* The floating point conditional move instructions don't directly
6943 support signed integer comparisons. */
6945 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6947 if (second_test != NULL || bypass_test != NULL)
6949 tmp = gen_reg_rtx (QImode);
6950 ix86_expand_setcc (code, tmp);
6952 ix86_compare_op0 = tmp;
6953 ix86_compare_op1 = const0_rtx;
6954 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6956 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6959 emit_move_insn (tmp, operands[3]);
6962 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6964 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6965 emit_move_insn (tmp, operands[2]);
6969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6981 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6982 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6990 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6991 works for floating pointer parameters and nonoffsetable memories.
6992 For pushes, it returns just stack offsets; the values will be saved
6993 in the right order. Maximally three parts are generated. */
6996 ix86_split_to_parts (operand, parts, mode)
6999 enum machine_mode mode;
7004 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
7006 size = (GET_MODE_SIZE (mode) + 4) / 8;
7008 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
7010 if (size < 2 || size > 3)
7013 /* Optimize constant pool reference to immediates. This is used by fp moves,
7014 that force all constants to memory to allow combining. */
7016 if (GET_CODE (operand) == MEM
7017 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
7018 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
7019 operand = get_pool_constant (XEXP (operand, 0));
7021 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
7023 /* The only non-offsetable memories we handle are pushes. */
7024 if (! push_operand (operand, VOIDmode))
7027 operand = copy_rtx (operand);
7028 PUT_MODE (operand, Pmode);
7029 parts[0] = parts[1] = parts[2] = operand;
7031 else if (!TARGET_64BIT)
7034 split_di (&operand, 1, &parts[0], &parts[1]);
7037 if (REG_P (operand))
7039 if (!reload_completed)
7041 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
7042 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7044 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
7046 else if (offsettable_memref_p (operand))
7048 operand = adjust_address (operand, SImode, 0);
7050 parts[1] = adjust_address (operand, SImode, 4);
7052 parts[2] = adjust_address (operand, SImode, 8);
7054 else if (GET_CODE (operand) == CONST_DOUBLE)
7059 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7064 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7065 parts[2] = GEN_INT (l[2]);
7068 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
7073 parts[1] = GEN_INT (l[1]);
7074 parts[0] = GEN_INT (l[0]);
7082 if (mode == XFmode || mode == TFmode)
7084 if (REG_P (operand))
7086 if (!reload_completed)
7088 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
7089 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
7091 else if (offsettable_memref_p (operand))
7093 operand = adjust_address (operand, DImode, 0);
7095 parts[1] = adjust_address (operand, SImode, 8);
7097 else if (GET_CODE (operand) == CONST_DOUBLE)
7102 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
7103 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
7104 /* Do not use shift by 32 to avoid warning on 32bit systems. */
7105 if (HOST_BITS_PER_WIDE_INT >= 64)
7106 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
7108 parts[0] = immed_double_const (l[0], l[1], DImode);
7109 parts[1] = GEN_INT (l[2]);
7119 /* Emit insns to perform a move or push of DI, DF, and XF values.
7120 Return false when normal moves are needed; true when all required
7121 insns have been emitted. Operands 2-4 contain the input values
7122 int the correct order; operands 5-7 contain the output values. */
7125 ix86_split_long_move (operands)
7132 enum machine_mode mode = GET_MODE (operands[0]);
7134 /* The DFmode expanders may ask us to move double.
7135 For 64bit target this is single move. By hiding the fact
7136 here we simplify i386.md splitters. */
7137 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
7139 /* Optimize constant pool reference to immediates. This is used by fp moves,
7140 that force all constants to memory to allow combining. */
7142 if (GET_CODE (operands[1]) == MEM
7143 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
7144 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
7145 operands[1] = get_pool_constant (XEXP (operands[1], 0));
7146 if (push_operand (operands[0], VOIDmode))
7148 operands[0] = copy_rtx (operands[0]);
7149 PUT_MODE (operands[0], Pmode);
7152 operands[0] = gen_lowpart (DImode, operands[0]);
7153 operands[1] = gen_lowpart (DImode, operands[1]);
7154 emit_move_insn (operands[0], operands[1]);
7158 /* The only non-offsettable memory we handle is push. */
7159 if (push_operand (operands[0], VOIDmode))
7161 else if (GET_CODE (operands[0]) == MEM
7162 && ! offsettable_memref_p (operands[0]))
7165 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
7166 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
7168 /* When emitting push, take care for source operands on the stack. */
7169 if (push && GET_CODE (operands[1]) == MEM
7170 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
7173 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
7174 XEXP (part[1][2], 0));
7175 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
7176 XEXP (part[1][1], 0));
7179 /* We need to do copy in the right order in case an address register
7180 of the source overlaps the destination. */
7181 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
7183 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
7185 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7188 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
7191 /* Collision in the middle part can be handled by reordering. */
7192 if (collisions == 1 && nparts == 3
7193 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7196 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7197 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7200 /* If there are more collisions, we can't handle it by reordering.
7201 Do an lea to the last part and use only one colliding move. */
7202 else if (collisions > 1)
7205 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
7206 XEXP (part[1][0], 0)));
7207 part[1][0] = change_address (part[1][0],
7208 TARGET_64BIT ? DImode : SImode,
7209 part[0][nparts - 1]);
7210 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
7212 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
7222 /* We use only first 12 bytes of TFmode value, but for pushing we
7223 are required to adjust stack as if we were pushing real 16byte
7225 if (mode == TFmode && !TARGET_64BIT)
7226 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7228 emit_move_insn (part[0][2], part[1][2]);
7233 /* In 64bit mode we don't have 32bit push available. In case this is
7234 register, it is OK - we will just use larger counterpart. We also
7235 retype memory - these comes from attempt to avoid REX prefix on
7236 moving of second half of TFmode value. */
7237 if (GET_MODE (part[1][1]) == SImode)
7239 if (GET_CODE (part[1][1]) == MEM)
7240 part[1][1] = adjust_address (part[1][1], DImode, 0);
7241 else if (REG_P (part[1][1]))
7242 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7245 if (GET_MODE (part[1][0]) == SImode)
7246 part[1][0] = part[1][1];
7249 emit_move_insn (part[0][1], part[1][1]);
7250 emit_move_insn (part[0][0], part[1][0]);
7254 /* Choose correct order to not overwrite the source before it is copied. */
7255 if ((REG_P (part[0][0])
7256 && REG_P (part[1][1])
7257 && (REGNO (part[0][0]) == REGNO (part[1][1])
7259 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7261 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7265 operands[2] = part[0][2];
7266 operands[3] = part[0][1];
7267 operands[4] = part[0][0];
7268 operands[5] = part[1][2];
7269 operands[6] = part[1][1];
7270 operands[7] = part[1][0];
7274 operands[2] = part[0][1];
7275 operands[3] = part[0][0];
7276 operands[5] = part[1][1];
7277 operands[6] = part[1][0];
7284 operands[2] = part[0][0];
7285 operands[3] = part[0][1];
7286 operands[4] = part[0][2];
7287 operands[5] = part[1][0];
7288 operands[6] = part[1][1];
7289 operands[7] = part[1][2];
7293 operands[2] = part[0][0];
7294 operands[3] = part[0][1];
7295 operands[5] = part[1][0];
7296 operands[6] = part[1][1];
7299 emit_move_insn (operands[2], operands[5]);
7300 emit_move_insn (operands[3], operands[6]);
7302 emit_move_insn (operands[4], operands[7]);
7308 ix86_split_ashldi (operands, scratch)
7309 rtx *operands, scratch;
7311 rtx low[2], high[2];
7314 if (GET_CODE (operands[2]) == CONST_INT)
7316 split_di (operands, 2, low, high);
7317 count = INTVAL (operands[2]) & 63;
7321 emit_move_insn (high[0], low[1]);
7322 emit_move_insn (low[0], const0_rtx);
7325 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7329 if (!rtx_equal_p (operands[0], operands[1]))
7330 emit_move_insn (operands[0], operands[1]);
7331 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7332 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7337 if (!rtx_equal_p (operands[0], operands[1]))
7338 emit_move_insn (operands[0], operands[1]);
7340 split_di (operands, 1, low, high);
7342 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7343 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
7345 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7347 if (! no_new_pseudos)
7348 scratch = force_reg (SImode, const0_rtx);
7350 emit_move_insn (scratch, const0_rtx);
7352 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7356 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7361 ix86_split_ashrdi (operands, scratch)
7362 rtx *operands, scratch;
7364 rtx low[2], high[2];
7367 if (GET_CODE (operands[2]) == CONST_INT)
7369 split_di (operands, 2, low, high);
7370 count = INTVAL (operands[2]) & 63;
7374 emit_move_insn (low[0], high[1]);
7376 if (! reload_completed)
7377 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7380 emit_move_insn (high[0], low[0]);
7381 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7385 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7389 if (!rtx_equal_p (operands[0], operands[1]))
7390 emit_move_insn (operands[0], operands[1]);
7391 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7392 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7397 if (!rtx_equal_p (operands[0], operands[1]))
7398 emit_move_insn (operands[0], operands[1]);
7400 split_di (operands, 1, low, high);
7402 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7403 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7405 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7407 if (! no_new_pseudos)
7408 scratch = gen_reg_rtx (SImode);
7409 emit_move_insn (scratch, high[0]);
7410 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7411 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7415 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
7420 ix86_split_lshrdi (operands, scratch)
7421 rtx *operands, scratch;
7423 rtx low[2], high[2];
7426 if (GET_CODE (operands[2]) == CONST_INT)
7428 split_di (operands, 2, low, high);
7429 count = INTVAL (operands[2]) & 63;
7433 emit_move_insn (low[0], high[1]);
7434 emit_move_insn (high[0], const0_rtx);
7437 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7441 if (!rtx_equal_p (operands[0], operands[1]))
7442 emit_move_insn (operands[0], operands[1]);
7443 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7444 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7449 if (!rtx_equal_p (operands[0], operands[1]))
7450 emit_move_insn (operands[0], operands[1]);
7452 split_di (operands, 1, low, high);
7454 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7455 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7457 /* Heh. By reversing the arguments, we can reuse this pattern. */
7458 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7460 if (! no_new_pseudos)
7461 scratch = force_reg (SImode, const0_rtx);
7463 emit_move_insn (scratch, const0_rtx);
7465 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7469 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7473 /* Helper function for the string operations below. Dest VARIABLE whether
7474 it is aligned to VALUE bytes. If true, jump to the label. */
7476 ix86_expand_aligntest (variable, value)
7480 rtx label = gen_label_rtx ();
7481 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7482 if (GET_MODE (variable) == DImode)
7483 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7485 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7486 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7491 /* Adjust COUNTER by the VALUE. */
7493 ix86_adjust_counter (countreg, value)
7495 HOST_WIDE_INT value;
7497 if (GET_MODE (countreg) == DImode)
7498 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7500 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7503 /* Zero extend possibly SImode EXP to Pmode register. */
7505 ix86_zero_extend_to_Pmode (exp)
7509 if (GET_MODE (exp) == VOIDmode)
7510 return force_reg (Pmode, exp);
7511 if (GET_MODE (exp) == Pmode)
7512 return copy_to_mode_reg (Pmode, exp);
7513 r = gen_reg_rtx (Pmode);
7514 emit_insn (gen_zero_extendsidi2 (r, exp));
7518 /* Expand string move (memcpy) operation. Use i386 string operations when
7519 profitable. expand_clrstr contains similar code. */
7521 ix86_expand_movstr (dst, src, count_exp, align_exp)
7522 rtx dst, src, count_exp, align_exp;
7524 rtx srcreg, destreg, countreg;
7525 enum machine_mode counter_mode;
7526 HOST_WIDE_INT align = 0;
7527 unsigned HOST_WIDE_INT count = 0;
7532 if (GET_CODE (align_exp) == CONST_INT)
7533 align = INTVAL (align_exp);
7535 /* This simple hack avoids all inlining code and simplifies code bellow. */
7536 if (!TARGET_ALIGN_STRINGOPS)
7539 if (GET_CODE (count_exp) == CONST_INT)
7540 count = INTVAL (count_exp);
7542 /* Figure out proper mode for counter. For 32bits it is always SImode,
7543 for 64bits use SImode when possible, otherwise DImode.
7544 Set count to number of bytes copied when known at compile time. */
7545 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7546 || x86_64_zero_extended_value (count_exp))
7547 counter_mode = SImode;
7549 counter_mode = DImode;
7551 if (counter_mode != SImode && counter_mode != DImode)
7554 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7555 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7557 emit_insn (gen_cld ());
7559 /* When optimizing for size emit simple rep ; movsb instruction for
7560 counts not divisible by 4. */
7562 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7564 countreg = ix86_zero_extend_to_Pmode (count_exp);
7566 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7567 destreg, srcreg, countreg));
7569 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7570 destreg, srcreg, countreg));
7573 /* For constant aligned (or small unaligned) copies use rep movsl
7574 followed by code copying the rest. For PentiumPro ensure 8 byte
7575 alignment to allow rep movsl acceleration. */
7579 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7580 || optimize_size || count < (unsigned int)64))
7582 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7583 if (count & ~(size - 1))
7585 countreg = copy_to_mode_reg (counter_mode,
7586 GEN_INT ((count >> (size == 4 ? 2 : 3))
7587 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7588 countreg = ix86_zero_extend_to_Pmode (countreg);
7592 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7593 destreg, srcreg, countreg));
7595 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7596 destreg, srcreg, countreg));
7599 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7600 destreg, srcreg, countreg));
7602 if (size == 8 && (count & 0x04))
7603 emit_insn (gen_strmovsi (destreg, srcreg));
7605 emit_insn (gen_strmovhi (destreg, srcreg));
7607 emit_insn (gen_strmovqi (destreg, srcreg));
7609 /* The generic code based on the glibc implementation:
7610 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7611 allowing accelerated copying there)
7612 - copy the data using rep movsl
7619 /* In case we don't know anything about the alignment, default to
7620 library version, since it is usually equally fast and result in
7622 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7628 if (TARGET_SINGLE_STRINGOP)
7629 emit_insn (gen_cld ());
7631 countreg2 = gen_reg_rtx (Pmode);
7632 countreg = copy_to_mode_reg (counter_mode, count_exp);
7634 /* We don't use loops to align destination and to copy parts smaller
7635 than 4 bytes, because gcc is able to optimize such code better (in
7636 the case the destination or the count really is aligned, gcc is often
7637 able to predict the branches) and also it is friendlier to the
7638 hardware branch prediction.
7640 Using loops is benefical for generic case, because we can
7641 handle small counts using the loops. Many CPUs (such as Athlon)
7642 have large REP prefix setup costs.
7644 This is quite costy. Maybe we can revisit this decision later or
7645 add some customizability to this code. */
7648 && align < (TARGET_PENTIUMPRO && (count == 0
7649 || count >= (unsigned int)260)
7650 ? 8 : UNITS_PER_WORD))
7652 label = gen_label_rtx ();
7653 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7654 LEU, 0, counter_mode, 1, 0, label);
7658 rtx label = ix86_expand_aligntest (destreg, 1);
7659 emit_insn (gen_strmovqi (destreg, srcreg));
7660 ix86_adjust_counter (countreg, 1);
7662 LABEL_NUSES (label) = 1;
7666 rtx label = ix86_expand_aligntest (destreg, 2);
7667 emit_insn (gen_strmovhi (destreg, srcreg));
7668 ix86_adjust_counter (countreg, 2);
7670 LABEL_NUSES (label) = 1;
7673 && ((TARGET_PENTIUMPRO && (count == 0
7674 || count >= (unsigned int)260))
7677 rtx label = ix86_expand_aligntest (destreg, 4);
7678 emit_insn (gen_strmovsi (destreg, srcreg));
7679 ix86_adjust_counter (countreg, 4);
7681 LABEL_NUSES (label) = 1;
7684 if (!TARGET_SINGLE_STRINGOP)
7685 emit_insn (gen_cld ());
7688 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7690 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7691 destreg, srcreg, countreg2));
7695 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7696 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7697 destreg, srcreg, countreg2));
7703 LABEL_NUSES (label) = 1;
7705 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7706 emit_insn (gen_strmovsi (destreg, srcreg));
7707 if ((align <= 4 || count == 0) && TARGET_64BIT)
7709 rtx label = ix86_expand_aligntest (countreg, 4);
7710 emit_insn (gen_strmovsi (destreg, srcreg));
7712 LABEL_NUSES (label) = 1;
7714 if (align > 2 && count != 0 && (count & 2))
7715 emit_insn (gen_strmovhi (destreg, srcreg));
7716 if (align <= 2 || count == 0)
7718 rtx label = ix86_expand_aligntest (countreg, 2);
7719 emit_insn (gen_strmovhi (destreg, srcreg));
7721 LABEL_NUSES (label) = 1;
7723 if (align > 1 && count != 0 && (count & 1))
7724 emit_insn (gen_strmovqi (destreg, srcreg));
7725 if (align <= 1 || count == 0)
7727 rtx label = ix86_expand_aligntest (countreg, 1);
7728 emit_insn (gen_strmovqi (destreg, srcreg));
7730 LABEL_NUSES (label) = 1;
7734 insns = get_insns ();
7737 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7742 /* Expand string clear operation (bzero). Use i386 string operations when
7743 profitable. expand_movstr contains similar code. */
7745 ix86_expand_clrstr (src, count_exp, align_exp)
7746 rtx src, count_exp, align_exp;
7748 rtx destreg, zeroreg, countreg;
7749 enum machine_mode counter_mode;
7750 HOST_WIDE_INT align = 0;
7751 unsigned HOST_WIDE_INT count = 0;
7753 if (GET_CODE (align_exp) == CONST_INT)
7754 align = INTVAL (align_exp);
7756 /* This simple hack avoids all inlining code and simplifies code bellow. */
7757 if (!TARGET_ALIGN_STRINGOPS)
7760 if (GET_CODE (count_exp) == CONST_INT)
7761 count = INTVAL (count_exp);
7762 /* Figure out proper mode for counter. For 32bits it is always SImode,
7763 for 64bits use SImode when possible, otherwise DImode.
7764 Set count to number of bytes copied when known at compile time. */
7765 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7766 || x86_64_zero_extended_value (count_exp))
7767 counter_mode = SImode;
7769 counter_mode = DImode;
7771 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7773 emit_insn (gen_cld ());
7775 /* When optimizing for size emit simple rep ; movsb instruction for
7776 counts not divisible by 4. */
7778 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7780 countreg = ix86_zero_extend_to_Pmode (count_exp);
7781 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7783 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7784 destreg, countreg));
7786 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7787 destreg, countreg));
7791 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7792 || optimize_size || count < (unsigned int)64))
7794 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7795 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7796 if (count & ~(size - 1))
7798 countreg = copy_to_mode_reg (counter_mode,
7799 GEN_INT ((count >> (size == 4 ? 2 : 3))
7800 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7801 countreg = ix86_zero_extend_to_Pmode (countreg);
7805 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7806 destreg, countreg));
7808 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7809 destreg, countreg));
7812 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7813 destreg, countreg));
7815 if (size == 8 && (count & 0x04))
7816 emit_insn (gen_strsetsi (destreg,
7817 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7819 emit_insn (gen_strsethi (destreg,
7820 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7822 emit_insn (gen_strsetqi (destreg,
7823 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7830 /* In case we don't know anything about the alignment, default to
7831 library version, since it is usually equally fast and result in
7833 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7836 if (TARGET_SINGLE_STRINGOP)
7837 emit_insn (gen_cld ());
7839 countreg2 = gen_reg_rtx (Pmode);
7840 countreg = copy_to_mode_reg (counter_mode, count_exp);
7841 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7844 && align < (TARGET_PENTIUMPRO && (count == 0
7845 || count >= (unsigned int)260)
7846 ? 8 : UNITS_PER_WORD))
7848 label = gen_label_rtx ();
7849 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7850 LEU, 0, counter_mode, 1, 0, label);
7854 rtx label = ix86_expand_aligntest (destreg, 1);
7855 emit_insn (gen_strsetqi (destreg,
7856 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7857 ix86_adjust_counter (countreg, 1);
7859 LABEL_NUSES (label) = 1;
7863 rtx label = ix86_expand_aligntest (destreg, 2);
7864 emit_insn (gen_strsethi (destreg,
7865 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7866 ix86_adjust_counter (countreg, 2);
7868 LABEL_NUSES (label) = 1;
7870 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7871 || count >= (unsigned int)260))
7873 rtx label = ix86_expand_aligntest (destreg, 4);
7874 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7875 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7877 ix86_adjust_counter (countreg, 4);
7879 LABEL_NUSES (label) = 1;
7882 if (!TARGET_SINGLE_STRINGOP)
7883 emit_insn (gen_cld ());
7886 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7888 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7889 destreg, countreg2));
7893 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7894 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7895 destreg, countreg2));
7901 LABEL_NUSES (label) = 1;
7903 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7904 emit_insn (gen_strsetsi (destreg,
7905 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7906 if (TARGET_64BIT && (align <= 4 || count == 0))
7908 rtx label = ix86_expand_aligntest (destreg, 2);
7909 emit_insn (gen_strsetsi (destreg,
7910 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7912 LABEL_NUSES (label) = 1;
7914 if (align > 2 && count != 0 && (count & 2))
7915 emit_insn (gen_strsethi (destreg,
7916 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7917 if (align <= 2 || count == 0)
7919 rtx label = ix86_expand_aligntest (destreg, 2);
7920 emit_insn (gen_strsethi (destreg,
7921 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7923 LABEL_NUSES (label) = 1;
7925 if (align > 1 && count != 0 && (count & 1))
7926 emit_insn (gen_strsetqi (destreg,
7927 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7928 if (align <= 1 || count == 0)
7930 rtx label = ix86_expand_aligntest (destreg, 1);
7931 emit_insn (gen_strsetqi (destreg,
7932 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7934 LABEL_NUSES (label) = 1;
7939 /* Expand strlen. */
7941 ix86_expand_strlen (out, src, eoschar, align)
7942 rtx out, src, eoschar, align;
7944 rtx addr, scratch1, scratch2, scratch3, scratch4;
7946 /* The generic case of strlen expander is long. Avoid it's
7947 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7949 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7950 && !TARGET_INLINE_ALL_STRINGOPS
7952 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7955 addr = force_reg (Pmode, XEXP (src, 0));
7956 scratch1 = gen_reg_rtx (Pmode);
7958 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7961 /* Well it seems that some optimizer does not combine a call like
7962 foo(strlen(bar), strlen(bar));
7963 when the move and the subtraction is done here. It does calculate
7964 the length just once when these instructions are done inside of
7965 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7966 often used and I use one fewer register for the lifetime of
7967 output_strlen_unroll() this is better. */
7969 emit_move_insn (out, addr);
7971 ix86_expand_strlensi_unroll_1 (out, align);
7973 /* strlensi_unroll_1 returns the address of the zero at the end of
7974 the string, like memchr(), so compute the length by subtracting
7975 the start address. */
7977 emit_insn (gen_subdi3 (out, out, addr));
7979 emit_insn (gen_subsi3 (out, out, addr));
7983 scratch2 = gen_reg_rtx (Pmode);
7984 scratch3 = gen_reg_rtx (Pmode);
7985 scratch4 = force_reg (Pmode, constm1_rtx);
7987 emit_move_insn (scratch3, addr);
7988 eoschar = force_reg (QImode, eoschar);
7990 emit_insn (gen_cld ());
7993 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7994 align, scratch4, scratch3));
7995 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7996 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
8000 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
8001 align, scratch4, scratch3));
8002 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
8003 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
8009 /* Expand the appropriate insns for doing strlen if not just doing
8012 out = result, initialized with the start address
8013 align_rtx = alignment of the address.
8014 scratch = scratch register, initialized with the startaddress when
8015 not aligned, otherwise undefined
8017 This is just the body. It needs the initialisations mentioned above and
8018 some address computing at the end. These things are done in i386.md. */
8021 ix86_expand_strlensi_unroll_1 (out, align_rtx)
8026 rtx align_2_label = NULL_RTX;
8027 rtx align_3_label = NULL_RTX;
8028 rtx align_4_label = gen_label_rtx ();
8029 rtx end_0_label = gen_label_rtx ();
8031 rtx tmpreg = gen_reg_rtx (SImode);
8032 rtx scratch = gen_reg_rtx (SImode);
8035 if (GET_CODE (align_rtx) == CONST_INT)
8036 align = INTVAL (align_rtx);
8038 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
8040 /* Is there a known alignment and is it less than 4? */
8043 rtx scratch1 = gen_reg_rtx (Pmode);
8044 emit_move_insn (scratch1, out);
8045 /* Is there a known alignment and is it not 2? */
8048 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
8049 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
8051 /* Leave just the 3 lower bits. */
8052 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
8053 NULL_RTX, 0, OPTAB_WIDEN);
8055 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
8056 Pmode, 1, 0, align_4_label);
8057 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
8058 Pmode, 1, 0, align_2_label);
8059 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
8060 Pmode, 1, 0, align_3_label);
8064 /* Since the alignment is 2, we have to check 2 or 0 bytes;
8065 check if is aligned to 4 - byte. */
8067 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
8068 NULL_RTX, 0, OPTAB_WIDEN);
8070 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
8071 Pmode, 1, 0, align_4_label);
8074 mem = gen_rtx_MEM (QImode, out);
8076 /* Now compare the bytes. */
8078 /* Compare the first n unaligned byte on a byte per byte basis. */
8079 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8080 QImode, 1, 0, end_0_label);
8082 /* Increment the address. */
8084 emit_insn (gen_adddi3 (out, out, const1_rtx));
8086 emit_insn (gen_addsi3 (out, out, const1_rtx));
8088 /* Not needed with an alignment of 2 */
8091 emit_label (align_2_label);
8093 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8094 QImode, 1, 0, end_0_label);
8097 emit_insn (gen_adddi3 (out, out, const1_rtx));
8099 emit_insn (gen_addsi3 (out, out, const1_rtx));
8101 emit_label (align_3_label);
8104 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
8105 QImode, 1, 0, end_0_label);
8108 emit_insn (gen_adddi3 (out, out, const1_rtx));
8110 emit_insn (gen_addsi3 (out, out, const1_rtx));
8113 /* Generate loop to check 4 bytes at a time. It is not a good idea to
8114 align this loop. It gives only huge programs, but does not help to
8116 emit_label (align_4_label);
8118 mem = gen_rtx_MEM (SImode, out);
8119 emit_move_insn (scratch, mem);
8121 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
8123 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
8125 /* This formula yields a nonzero result iff one of the bytes is zero.
8126 This saves three branches inside loop and many cycles. */
8128 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
8129 emit_insn (gen_one_cmplsi2 (scratch, scratch));
8130 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
8131 emit_insn (gen_andsi3 (tmpreg, tmpreg,
8132 GEN_INT (trunc_int_for_mode
8133 (0x80808080, SImode))));
8134 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
8135 SImode, 1, 0, align_4_label);
8139 rtx reg = gen_reg_rtx (SImode);
8140 rtx reg2 = gen_reg_rtx (Pmode);
8141 emit_move_insn (reg, tmpreg);
8142 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
8144 /* If zero is not in the first two bytes, move two bytes forward. */
8145 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
8146 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8147 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8148 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
8149 gen_rtx_IF_THEN_ELSE (SImode, tmp,
8152 /* Emit lea manually to avoid clobbering of flags. */
8153 emit_insn (gen_rtx_SET (SImode, reg2,
8154 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
8156 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8157 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
8158 emit_insn (gen_rtx_SET (VOIDmode, out,
8159 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
8166 rtx end_2_label = gen_label_rtx ();
8167 /* Is zero in the first two bytes? */
8169 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
8170 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
8171 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
8172 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8173 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
8175 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8176 JUMP_LABEL (tmp) = end_2_label;
8178 /* Not in the first two. Move two bytes forward. */
8179 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
8181 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
8183 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
8185 emit_label (end_2_label);
8189 /* Avoid branch in fixing the byte. */
8190 tmpreg = gen_lowpart (QImode, tmpreg);
8191 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
8193 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8195 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
8197 emit_label (end_0_label);
8200 /* Clear stack slot assignments remembered from previous functions.
8201 This is called from INIT_EXPANDERS once before RTL is emitted for each
8205 ix86_init_machine_status (p)
8208 p->machine = (struct machine_function *)
8209 xcalloc (1, sizeof (struct machine_function));
8212 /* Mark machine specific bits of P for GC. */
8214 ix86_mark_machine_status (p)
8217 struct machine_function *machine = p->machine;
8218 enum machine_mode mode;
8224 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8225 mode = (enum machine_mode) ((int) mode + 1))
8226 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
8227 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8231 ix86_free_machine_status (p)
8238 /* Return a MEM corresponding to a stack slot with mode MODE.
8239 Allocate a new slot if necessary.
8241 The RTL for a function can have several slots available: N is
8242 which slot to use. */
8245 assign_386_stack_local (mode, n)
8246 enum machine_mode mode;
8249 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8252 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8253 ix86_stack_locals[(int) mode][n]
8254 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8256 return ix86_stack_locals[(int) mode][n];
8259 /* Calculate the length of the memory address in the instruction
8260 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8263 memory_address_length (addr)
8266 struct ix86_address parts;
8267 rtx base, index, disp;
8270 if (GET_CODE (addr) == PRE_DEC
8271 || GET_CODE (addr) == POST_INC
8272 || GET_CODE (addr) == PRE_MODIFY
8273 || GET_CODE (addr) == POST_MODIFY)
8276 if (! ix86_decompose_address (addr, &parts))
8280 index = parts.index;
8284 /* Register Indirect. */
8285 if (base && !index && !disp)
8287 /* Special cases: ebp and esp need the two-byte modrm form. */
8288 if (addr == stack_pointer_rtx
8289 || addr == arg_pointer_rtx
8290 || addr == frame_pointer_rtx
8291 || addr == hard_frame_pointer_rtx)
8295 /* Direct Addressing. */
8296 else if (disp && !base && !index)
8301 /* Find the length of the displacement constant. */
8304 if (GET_CODE (disp) == CONST_INT
8305 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8311 /* An index requires the two-byte modrm form. */
8319 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8320 expect that insn have 8bit immediate alternative. */
8322 ix86_attr_length_immediate_default (insn, shortform)
8328 extract_insn_cached (insn);
8329 for (i = recog_data.n_operands - 1; i >= 0; --i)
8330 if (CONSTANT_P (recog_data.operand[i]))
8335 && GET_CODE (recog_data.operand[i]) == CONST_INT
8336 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8340 switch (get_attr_mode (insn))
8352 fatal_insn ("Unknown insn mode", insn);
8358 /* Compute default value for "length_address" attribute. */
8360 ix86_attr_length_address_default (insn)
8364 extract_insn_cached (insn);
8365 for (i = recog_data.n_operands - 1; i >= 0; --i)
8366 if (GET_CODE (recog_data.operand[i]) == MEM)
8368 return memory_address_length (XEXP (recog_data.operand[i], 0));
8374 /* Return the maximum number of instructions a cpu can issue. */
8381 case PROCESSOR_PENTIUM:
8385 case PROCESSOR_PENTIUMPRO:
8386 case PROCESSOR_PENTIUM4:
8387 case PROCESSOR_ATHLON:
8395 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8396 by DEP_INSN and nothing set by DEP_INSN. */
8399 ix86_flags_dependant (insn, dep_insn, insn_type)
8401 enum attr_type insn_type;
8405 /* Simplify the test for uninteresting insns. */
8406 if (insn_type != TYPE_SETCC
8407 && insn_type != TYPE_ICMOV
8408 && insn_type != TYPE_FCMOV
8409 && insn_type != TYPE_IBR)
8412 if ((set = single_set (dep_insn)) != 0)
8414 set = SET_DEST (set);
8417 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8418 && XVECLEN (PATTERN (dep_insn), 0) == 2
8419 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8420 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8422 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8423 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8428 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8431 /* This test is true if the dependant insn reads the flags but
8432 not any other potentially set register. */
8433 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8436 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8442 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8443 address with operands set by DEP_INSN. */
8446 ix86_agi_dependant (insn, dep_insn, insn_type)
8448 enum attr_type insn_type;
8452 if (insn_type == TYPE_LEA
8455 addr = PATTERN (insn);
8456 if (GET_CODE (addr) == SET)
8458 else if (GET_CODE (addr) == PARALLEL
8459 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8460 addr = XVECEXP (addr, 0, 0);
8463 addr = SET_SRC (addr);
8468 extract_insn_cached (insn);
8469 for (i = recog_data.n_operands - 1; i >= 0; --i)
8470 if (GET_CODE (recog_data.operand[i]) == MEM)
8472 addr = XEXP (recog_data.operand[i], 0);
8479 return modified_in_p (addr, dep_insn);
8483 ix86_adjust_cost (insn, link, dep_insn, cost)
8484 rtx insn, link, dep_insn;
8487 enum attr_type insn_type, dep_insn_type;
8488 enum attr_memory memory, dep_memory;
8490 int dep_insn_code_number;
8492 /* Anti and output depenancies have zero cost on all CPUs. */
8493 if (REG_NOTE_KIND (link) != 0)
8496 dep_insn_code_number = recog_memoized (dep_insn);
8498 /* If we can't recognize the insns, we can't really do anything. */
8499 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
8502 insn_type = get_attr_type (insn);
8503 dep_insn_type = get_attr_type (dep_insn);
8507 case PROCESSOR_PENTIUM:
8508 /* Address Generation Interlock adds a cycle of latency. */
8509 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8512 /* ??? Compares pair with jump/setcc. */
8513 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8516 /* Floating point stores require value to be ready one cycle ealier. */
8517 if (insn_type == TYPE_FMOV
8518 && get_attr_memory (insn) == MEMORY_STORE
8519 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8523 case PROCESSOR_PENTIUMPRO:
8524 memory = get_attr_memory (insn);
8525 dep_memory = get_attr_memory (dep_insn);
8527 /* Since we can't represent delayed latencies of load+operation,
8528 increase the cost here for non-imov insns. */
8529 if (dep_insn_type != TYPE_IMOV
8530 && dep_insn_type != TYPE_FMOV
8531 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
8534 /* INT->FP conversion is expensive. */
8535 if (get_attr_fp_int_src (dep_insn))
8538 /* There is one cycle extra latency between an FP op and a store. */
8539 if (insn_type == TYPE_FMOV
8540 && (set = single_set (dep_insn)) != NULL_RTX
8541 && (set2 = single_set (insn)) != NULL_RTX
8542 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8543 && GET_CODE (SET_DEST (set2)) == MEM)
8546 /* Show ability of reorder buffer to hide latency of load by executing
8547 in parallel with previous instruction in case
8548 previous instruction is not needed to compute the address. */
8549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8550 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8552 /* Claim moves to take one cycle, as core can issue one load
8553 at time and the next load can start cycle later. */
8554 if (dep_insn_type == TYPE_IMOV
8555 || dep_insn_type == TYPE_FMOV)
8563 memory = get_attr_memory (insn);
8564 dep_memory = get_attr_memory (dep_insn);
8565 /* The esp dependency is resolved before the instruction is really
8567 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8568 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8571 /* Since we can't represent delayed latencies of load+operation,
8572 increase the cost here for non-imov insns. */
8573 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
8574 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8576 /* INT->FP conversion is expensive. */
8577 if (get_attr_fp_int_src (dep_insn))
8580 /* Show ability of reorder buffer to hide latency of load by executing
8581 in parallel with previous instruction in case
8582 previous instruction is not needed to compute the address. */
8583 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8584 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8586 /* Claim moves to take one cycle, as core can issue one load
8587 at time and the next load can start cycle later. */
8588 if (dep_insn_type == TYPE_IMOV
8589 || dep_insn_type == TYPE_FMOV)
8598 case PROCESSOR_ATHLON:
8599 memory = get_attr_memory (insn);
8600 dep_memory = get_attr_memory (dep_insn);
8602 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
8604 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8609 /* Show ability of reorder buffer to hide latency of load by executing
8610 in parallel with previous instruction in case
8611 previous instruction is not needed to compute the address. */
8612 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
8613 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8615 /* Claim moves to take one cycle, as core can issue one load
8616 at time and the next load can start cycle later. */
8617 if (dep_insn_type == TYPE_IMOV
8618 || dep_insn_type == TYPE_FMOV)
8635 struct ppro_sched_data
8638 int issued_this_cycle;
8643 ix86_safe_length (insn)
8646 if (recog_memoized (insn) >= 0)
8647 return get_attr_length(insn);
8653 ix86_safe_length_prefix (insn)
8656 if (recog_memoized (insn) >= 0)
8657 return get_attr_length(insn);
8662 static enum attr_memory
8663 ix86_safe_memory (insn)
8666 if (recog_memoized (insn) >= 0)
8667 return get_attr_memory(insn);
8669 return MEMORY_UNKNOWN;
8672 static enum attr_pent_pair
8673 ix86_safe_pent_pair (insn)
8676 if (recog_memoized (insn) >= 0)
8677 return get_attr_pent_pair(insn);
8679 return PENT_PAIR_NP;
8682 static enum attr_ppro_uops
8683 ix86_safe_ppro_uops (insn)
8686 if (recog_memoized (insn) >= 0)
8687 return get_attr_ppro_uops (insn);
8689 return PPRO_UOPS_MANY;
8693 ix86_dump_ppro_packet (dump)
8696 if (ix86_sched_data.ppro.decode[0])
8698 fprintf (dump, "PPRO packet: %d",
8699 INSN_UID (ix86_sched_data.ppro.decode[0]));
8700 if (ix86_sched_data.ppro.decode[1])
8701 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8702 if (ix86_sched_data.ppro.decode[2])
8703 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8708 /* We're beginning a new block. Initialize data structures as necessary. */
8711 ix86_sched_init (dump, sched_verbose)
8712 FILE *dump ATTRIBUTE_UNUSED;
8713 int sched_verbose ATTRIBUTE_UNUSED;
8715 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8718 /* Shift INSN to SLOT, and shift everything else down. */
8721 ix86_reorder_insn (insnp, slot)
8728 insnp[0] = insnp[1];
8729 while (++insnp != slot);
8734 /* Find an instruction with given pairability and minimal amount of cycles
8735 lost by the fact that the CPU waits for both pipelines to finish before
8736 reading next instructions. Also take care that both instructions together
8737 can not exceed 7 bytes. */
8740 ix86_pent_find_pair (e_ready, ready, type, first)
8743 enum attr_pent_pair type;
8746 int mincycles, cycles;
8747 enum attr_pent_pair tmp;
8748 enum attr_memory memory;
8749 rtx *insnp, *bestinsnp = NULL;
8751 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8754 memory = ix86_safe_memory (first);
8755 cycles = result_ready_cost (first);
8756 mincycles = INT_MAX;
8758 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8759 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8760 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
8762 enum attr_memory second_memory;
8763 int secondcycles, currentcycles;
8765 second_memory = ix86_safe_memory (*insnp);
8766 secondcycles = result_ready_cost (*insnp);
8767 currentcycles = abs (cycles - secondcycles);
8769 if (secondcycles >= 1 && cycles >= 1)
8771 /* Two read/modify/write instructions together takes two
8773 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8776 /* Read modify/write instruction followed by read/modify
8777 takes one cycle longer. */
8778 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8779 && tmp != PENT_PAIR_UV
8780 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8783 if (currentcycles < mincycles)
8784 bestinsnp = insnp, mincycles = currentcycles;
8790 /* Subroutines of ix86_sched_reorder. */
8793 ix86_sched_reorder_pentium (ready, e_ready)
8797 enum attr_pent_pair pair1, pair2;
8800 /* This wouldn't be necessary if Haifa knew that static insn ordering
8801 is important to which pipe an insn is issued to. So we have to make
8802 some minor rearrangements. */
8804 pair1 = ix86_safe_pent_pair (*e_ready);
8806 /* If the first insn is non-pairable, let it be. */
8807 if (pair1 == PENT_PAIR_NP)
8810 pair2 = PENT_PAIR_NP;
8813 /* If the first insn is UV or PV pairable, search for a PU
8815 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
8817 insnp = ix86_pent_find_pair (e_ready-1, ready,
8818 PENT_PAIR_PU, *e_ready);
8820 pair2 = PENT_PAIR_PU;
8823 /* If the first insn is PU or UV pairable, search for a PV
8825 if (pair2 == PENT_PAIR_NP
8826 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8828 insnp = ix86_pent_find_pair (e_ready-1, ready,
8829 PENT_PAIR_PV, *e_ready);
8831 pair2 = PENT_PAIR_PV;
8834 /* If the first insn is pairable, search for a UV
8836 if (pair2 == PENT_PAIR_NP)
8838 insnp = ix86_pent_find_pair (e_ready-1, ready,
8839 PENT_PAIR_UV, *e_ready);
8841 pair2 = PENT_PAIR_UV;
8844 if (pair2 == PENT_PAIR_NP)
8847 /* Found something! Decide if we need to swap the order. */
8848 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8849 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8850 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8851 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8852 ix86_reorder_insn (insnp, e_ready);
8854 ix86_reorder_insn (insnp, e_ready - 1);
8858 ix86_sched_reorder_ppro (ready, e_ready)
8863 enum attr_ppro_uops cur_uops;
8864 int issued_this_cycle;
8868 /* At this point .ppro.decode contains the state of the three
8869 decoders from last "cycle". That is, those insns that were
8870 actually independent. But here we're scheduling for the
8871 decoder, and we may find things that are decodable in the
8874 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
8875 issued_this_cycle = 0;
8878 cur_uops = ix86_safe_ppro_uops (*insnp);
8880 /* If the decoders are empty, and we've a complex insn at the
8881 head of the priority queue, let it issue without complaint. */
8882 if (decode[0] == NULL)
8884 if (cur_uops == PPRO_UOPS_MANY)
8890 /* Otherwise, search for a 2-4 uop unsn to issue. */
8891 while (cur_uops != PPRO_UOPS_FEW)
8895 cur_uops = ix86_safe_ppro_uops (*--insnp);
8898 /* If so, move it to the head of the line. */
8899 if (cur_uops == PPRO_UOPS_FEW)
8900 ix86_reorder_insn (insnp, e_ready);
8902 /* Issue the head of the queue. */
8903 issued_this_cycle = 1;
8904 decode[0] = *e_ready--;
8907 /* Look for simple insns to fill in the other two slots. */
8908 for (i = 1; i < 3; ++i)
8909 if (decode[i] == NULL)
8911 if (ready >= e_ready)
8915 cur_uops = ix86_safe_ppro_uops (*insnp);
8916 while (cur_uops != PPRO_UOPS_ONE)
8920 cur_uops = ix86_safe_ppro_uops (*--insnp);
8923 /* Found one. Move it to the head of the queue and issue it. */
8924 if (cur_uops == PPRO_UOPS_ONE)
8926 ix86_reorder_insn (insnp, e_ready);
8927 decode[i] = *e_ready--;
8928 issued_this_cycle++;
8932 /* ??? Didn't find one. Ideally, here we would do a lazy split
8933 of 2-uop insns, issue one and queue the other. */
8937 if (issued_this_cycle == 0)
8938 issued_this_cycle = 1;
8939 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8942 /* We are about to being issuing insns for this clock cycle.
8943 Override the default sort algorithm to better slot instructions. */
8945 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8946 FILE *dump ATTRIBUTE_UNUSED;
8947 int sched_verbose ATTRIBUTE_UNUSED;
8950 int clock_var ATTRIBUTE_UNUSED;
8952 rtx *e_ready = ready + n_ready - 1;
8962 case PROCESSOR_PENTIUM:
8963 ix86_sched_reorder_pentium (ready, e_ready);
8966 case PROCESSOR_PENTIUMPRO:
8967 ix86_sched_reorder_ppro (ready, e_ready);
8972 return ix86_issue_rate ();
8975 /* We are about to issue INSN. Return the number of insns left on the
8976 ready queue that can be issued this cycle. */
8979 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8989 return can_issue_more - 1;
8991 case PROCESSOR_PENTIUMPRO:
8993 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
8995 if (uops == PPRO_UOPS_MANY)
8998 ix86_dump_ppro_packet (dump);
8999 ix86_sched_data.ppro.decode[0] = insn;
9000 ix86_sched_data.ppro.decode[1] = NULL;
9001 ix86_sched_data.ppro.decode[2] = NULL;
9003 ix86_dump_ppro_packet (dump);
9004 ix86_sched_data.ppro.decode[0] = NULL;
9006 else if (uops == PPRO_UOPS_FEW)
9009 ix86_dump_ppro_packet (dump);
9010 ix86_sched_data.ppro.decode[0] = insn;
9011 ix86_sched_data.ppro.decode[1] = NULL;
9012 ix86_sched_data.ppro.decode[2] = NULL;
9016 for (i = 0; i < 3; ++i)
9017 if (ix86_sched_data.ppro.decode[i] == NULL)
9019 ix86_sched_data.ppro.decode[i] = insn;
9027 ix86_dump_ppro_packet (dump);
9028 ix86_sched_data.ppro.decode[0] = NULL;
9029 ix86_sched_data.ppro.decode[1] = NULL;
9030 ix86_sched_data.ppro.decode[2] = NULL;
9034 return --ix86_sched_data.ppro.issued_this_cycle;
9038 /* Walk through INSNS and look for MEM references whose address is DSTREG or
9039 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
9043 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
9045 rtx dstref, srcref, dstreg, srcreg;
9049 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
9051 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
9055 /* Subroutine of above to actually do the updating by recursively walking
9059 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
9061 rtx dstref, srcref, dstreg, srcreg;
9063 enum rtx_code code = GET_CODE (x);
9064 const char *format_ptr = GET_RTX_FORMAT (code);
9067 if (code == MEM && XEXP (x, 0) == dstreg)
9068 MEM_COPY_ATTRIBUTES (x, dstref);
9069 else if (code == MEM && XEXP (x, 0) == srcreg)
9070 MEM_COPY_ATTRIBUTES (x, srcref);
9072 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
9074 if (*format_ptr == 'e')
9075 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
9077 else if (*format_ptr == 'E')
9078 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9079 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
9084 /* Compute the alignment given to a constant that is being placed in memory.
9085 EXP is the constant and ALIGN is the alignment that the object would
9087 The value of this function is used instead of that alignment to align
9091 ix86_constant_alignment (exp, align)
9095 if (TREE_CODE (exp) == REAL_CST)
9097 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
9099 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
9102 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
9109 /* Compute the alignment for a static variable.
9110 TYPE is the data type, and ALIGN is the alignment that
9111 the object would ordinarily have. The value of this function is used
9112 instead of that alignment to align the object. */
9115 ix86_data_alignment (type, align)
9119 if (AGGREGATE_TYPE_P (type)
9121 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9122 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
9123 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
9126 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9127 to 16byte boundary. */
9130 if (AGGREGATE_TYPE_P (type)
9132 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9133 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
9134 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9138 if (TREE_CODE (type) == ARRAY_TYPE)
9140 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9142 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9145 else if (TREE_CODE (type) == COMPLEX_TYPE)
9148 if (TYPE_MODE (type) == DCmode && align < 64)
9150 if (TYPE_MODE (type) == XCmode && align < 128)
9153 else if ((TREE_CODE (type) == RECORD_TYPE
9154 || TREE_CODE (type) == UNION_TYPE
9155 || TREE_CODE (type) == QUAL_UNION_TYPE)
9156 && TYPE_FIELDS (type))
9158 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9160 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9163 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9164 || TREE_CODE (type) == INTEGER_TYPE)
9166 if (TYPE_MODE (type) == DFmode && align < 64)
9168 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9175 /* Compute the alignment for a local variable.
9176 TYPE is the data type, and ALIGN is the alignment that
9177 the object would ordinarily have. The value of this macro is used
9178 instead of that alignment to align the object. */
9181 ix86_local_alignment (type, align)
9185 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
9186 to 16byte boundary. */
9189 if (AGGREGATE_TYPE_P (type)
9191 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
9192 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
9193 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
9196 if (TREE_CODE (type) == ARRAY_TYPE)
9198 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
9200 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
9203 else if (TREE_CODE (type) == COMPLEX_TYPE)
9205 if (TYPE_MODE (type) == DCmode && align < 64)
9207 if (TYPE_MODE (type) == XCmode && align < 128)
9210 else if ((TREE_CODE (type) == RECORD_TYPE
9211 || TREE_CODE (type) == UNION_TYPE
9212 || TREE_CODE (type) == QUAL_UNION_TYPE)
9213 && TYPE_FIELDS (type))
9215 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
9217 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
9220 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
9221 || TREE_CODE (type) == INTEGER_TYPE)
9224 if (TYPE_MODE (type) == DFmode && align < 64)
9226 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
9232 /* Emit RTL insns to initialize the variable parts of a trampoline.
9233 FNADDR is an RTX for the address of the function's pure code.
9234 CXT is an RTX for the static chain value for the function. */
9236 x86_initialize_trampoline (tramp, fnaddr, cxt)
9237 rtx tramp, fnaddr, cxt;
9241 /* Compute offset from the end of the jmp to the target function. */
9242 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9243 plus_constant (tramp, 10),
9244 NULL_RTX, 1, OPTAB_DIRECT);
9245 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9246 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9247 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9248 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9249 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9250 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9255 /* Try to load address using shorter movl instead of movabs.
9256 We may want to support movq for kernel mode, but kernel does not use
9257 trampolines at the moment. */
9258 if (x86_64_zero_extended_value (fnaddr))
9260 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9261 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9262 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9263 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9264 gen_lowpart (SImode, fnaddr));
9269 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9270 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9271 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9275 /* Load static chain using movabs to r10. */
9276 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9277 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9278 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9281 /* Jump to the r11 */
9282 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9283 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9284 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9285 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9287 if (offset > TRAMPOLINE_SIZE)
9292 #define def_builtin(NAME, TYPE, CODE) \
9293 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
9294 struct builtin_description
9296 enum insn_code icode;
9298 enum ix86_builtins code;
9299 enum rtx_code comparison;
9303 static struct builtin_description bdesc_comi[] =
9305 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9306 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9307 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9308 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9309 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9310 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9311 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9312 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9313 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9314 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9315 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9316 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9319 static struct builtin_description bdesc_2arg[] =
9322 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9323 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9324 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9325 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9326 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9327 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9328 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9329 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9331 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9332 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9333 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9334 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9335 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9336 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9337 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9338 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9339 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9340 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9341 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9342 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9343 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9344 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9345 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9346 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9347 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9348 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9349 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9350 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9351 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9352 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9353 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9354 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9356 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9357 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9358 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9359 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9361 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9362 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9363 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9364 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9366 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9367 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9368 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9369 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9370 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9373 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9374 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9375 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9376 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9377 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9378 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9380 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9381 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9382 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9383 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9384 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9385 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9386 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9387 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9389 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9390 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9391 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9393 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9394 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9395 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9396 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9398 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9399 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9401 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9402 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9403 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9404 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9405 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9406 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9408 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9409 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9410 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9411 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9413 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9414 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9415 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9416 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9417 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9418 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9421 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9422 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9423 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9425 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9426 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9428 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9429 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9430 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9431 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9432 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9433 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9435 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9436 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9437 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9438 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9439 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9440 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9442 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9443 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9444 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9445 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9447 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9448 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9452 static struct builtin_description bdesc_1arg[] =
9454 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9455 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9457 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9458 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9459 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9461 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9462 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9463 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9464 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9468 /* Set up all the target-specific builtins. */
9470 ix86_init_builtins ()
9473 ix86_init_mmx_sse_builtins ();
9476 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
9477 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9480 ix86_init_mmx_sse_builtins ()
9482 struct builtin_description * d;
9484 tree endlink = void_list_node;
9486 tree pchar_type_node = build_pointer_type (char_type_node);
9487 tree pfloat_type_node = build_pointer_type (float_type_node);
9488 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9489 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9492 tree int_ftype_v4sf_v4sf
9493 = build_function_type (integer_type_node,
9494 tree_cons (NULL_TREE, V4SF_type_node,
9495 tree_cons (NULL_TREE,
9498 tree v4si_ftype_v4sf_v4sf
9499 = build_function_type (V4SI_type_node,
9500 tree_cons (NULL_TREE, V4SF_type_node,
9501 tree_cons (NULL_TREE,
9504 /* MMX/SSE/integer conversions. */
9506 = build_function_type (integer_type_node,
9507 tree_cons (NULL_TREE, V4SF_type_node,
9510 = build_function_type (integer_type_node,
9511 tree_cons (NULL_TREE, V8QI_type_node,
9514 = build_function_type (integer_type_node,
9515 tree_cons (NULL_TREE, V2SI_type_node,
9518 = build_function_type (V2SI_type_node,
9519 tree_cons (NULL_TREE, integer_type_node,
9521 tree v4sf_ftype_v4sf_int
9522 = build_function_type (integer_type_node,
9523 tree_cons (NULL_TREE, V4SF_type_node,
9524 tree_cons (NULL_TREE, integer_type_node,
9526 tree v4sf_ftype_v4sf_v2si
9527 = build_function_type (V4SF_type_node,
9528 tree_cons (NULL_TREE, V4SF_type_node,
9529 tree_cons (NULL_TREE, V2SI_type_node,
9531 tree int_ftype_v4hi_int
9532 = build_function_type (integer_type_node,
9533 tree_cons (NULL_TREE, V4HI_type_node,
9534 tree_cons (NULL_TREE, integer_type_node,
9536 tree v4hi_ftype_v4hi_int_int
9537 = build_function_type (V4HI_type_node,
9538 tree_cons (NULL_TREE, V4HI_type_node,
9539 tree_cons (NULL_TREE, integer_type_node,
9540 tree_cons (NULL_TREE,
9543 /* Miscellaneous. */
9544 tree v8qi_ftype_v4hi_v4hi
9545 = build_function_type (V8QI_type_node,
9546 tree_cons (NULL_TREE, V4HI_type_node,
9547 tree_cons (NULL_TREE, V4HI_type_node,
9549 tree v4hi_ftype_v2si_v2si
9550 = build_function_type (V4HI_type_node,
9551 tree_cons (NULL_TREE, V2SI_type_node,
9552 tree_cons (NULL_TREE, V2SI_type_node,
9554 tree v4sf_ftype_v4sf_v4sf_int
9555 = build_function_type (V4SF_type_node,
9556 tree_cons (NULL_TREE, V4SF_type_node,
9557 tree_cons (NULL_TREE, V4SF_type_node,
9558 tree_cons (NULL_TREE,
9561 tree v4hi_ftype_v8qi_v8qi
9562 = build_function_type (V4HI_type_node,
9563 tree_cons (NULL_TREE, V8QI_type_node,
9564 tree_cons (NULL_TREE, V8QI_type_node,
9566 tree v2si_ftype_v4hi_v4hi
9567 = build_function_type (V2SI_type_node,
9568 tree_cons (NULL_TREE, V4HI_type_node,
9569 tree_cons (NULL_TREE, V4HI_type_node,
9571 tree v4hi_ftype_v4hi_int
9572 = build_function_type (V4HI_type_node,
9573 tree_cons (NULL_TREE, V4HI_type_node,
9574 tree_cons (NULL_TREE, integer_type_node,
9576 tree v4hi_ftype_v4hi_di
9577 = build_function_type (V4HI_type_node,
9578 tree_cons (NULL_TREE, V4HI_type_node,
9579 tree_cons (NULL_TREE,
9580 long_long_integer_type_node,
9582 tree v2si_ftype_v2si_di
9583 = build_function_type (V2SI_type_node,
9584 tree_cons (NULL_TREE, V2SI_type_node,
9585 tree_cons (NULL_TREE,
9586 long_long_integer_type_node,
9588 tree void_ftype_void
9589 = build_function_type (void_type_node, endlink);
9590 tree void_ftype_pchar_int
9591 = build_function_type (void_type_node,
9592 tree_cons (NULL_TREE, pchar_type_node,
9593 tree_cons (NULL_TREE, integer_type_node,
9595 tree void_ftype_unsigned
9596 = build_function_type (void_type_node,
9597 tree_cons (NULL_TREE, unsigned_type_node,
9599 tree unsigned_ftype_void
9600 = build_function_type (unsigned_type_node, endlink);
9602 = build_function_type (long_long_unsigned_type_node, endlink);
9604 = build_function_type (intTI_type_node, endlink);
9605 tree v2si_ftype_v4sf
9606 = build_function_type (V2SI_type_node,
9607 tree_cons (NULL_TREE, V4SF_type_node,
9610 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9611 tree_cons (NULL_TREE, V8QI_type_node,
9612 tree_cons (NULL_TREE,
9615 tree void_ftype_v8qi_v8qi_pchar
9616 = build_function_type (void_type_node, maskmovq_args);
9617 tree v4sf_ftype_pfloat
9618 = build_function_type (V4SF_type_node,
9619 tree_cons (NULL_TREE, pfloat_type_node,
9621 tree v4sf_ftype_float
9622 = build_function_type (V4SF_type_node,
9623 tree_cons (NULL_TREE, float_type_node,
9625 tree v4sf_ftype_float_float_float_float
9626 = build_function_type (V4SF_type_node,
9627 tree_cons (NULL_TREE, float_type_node,
9628 tree_cons (NULL_TREE, float_type_node,
9629 tree_cons (NULL_TREE,
9631 tree_cons (NULL_TREE,
9634 /* @@@ the type is bogus */
9635 tree v4sf_ftype_v4sf_pv2si
9636 = build_function_type (V4SF_type_node,
9637 tree_cons (NULL_TREE, V4SF_type_node,
9638 tree_cons (NULL_TREE, pv2si_type_node,
9640 tree v4sf_ftype_pv2si_v4sf
9641 = build_function_type (V4SF_type_node,
9642 tree_cons (NULL_TREE, V4SF_type_node,
9643 tree_cons (NULL_TREE, pv2si_type_node,
9645 tree void_ftype_pfloat_v4sf
9646 = build_function_type (void_type_node,
9647 tree_cons (NULL_TREE, pfloat_type_node,
9648 tree_cons (NULL_TREE, V4SF_type_node,
9650 tree void_ftype_pdi_di
9651 = build_function_type (void_type_node,
9652 tree_cons (NULL_TREE, pdi_type_node,
9653 tree_cons (NULL_TREE,
9654 long_long_unsigned_type_node,
9656 /* Normal vector unops. */
9657 tree v4sf_ftype_v4sf
9658 = build_function_type (V4SF_type_node,
9659 tree_cons (NULL_TREE, V4SF_type_node,
9662 /* Normal vector binops. */
9663 tree v4sf_ftype_v4sf_v4sf
9664 = build_function_type (V4SF_type_node,
9665 tree_cons (NULL_TREE, V4SF_type_node,
9666 tree_cons (NULL_TREE, V4SF_type_node,
9668 tree v8qi_ftype_v8qi_v8qi
9669 = build_function_type (V8QI_type_node,
9670 tree_cons (NULL_TREE, V8QI_type_node,
9671 tree_cons (NULL_TREE, V8QI_type_node,
9673 tree v4hi_ftype_v4hi_v4hi
9674 = build_function_type (V4HI_type_node,
9675 tree_cons (NULL_TREE, V4HI_type_node,
9676 tree_cons (NULL_TREE, V4HI_type_node,
9678 tree v2si_ftype_v2si_v2si
9679 = build_function_type (V2SI_type_node,
9680 tree_cons (NULL_TREE, V2SI_type_node,
9681 tree_cons (NULL_TREE, V2SI_type_node,
9684 = build_function_type (intTI_type_node,
9685 tree_cons (NULL_TREE, intTI_type_node,
9686 tree_cons (NULL_TREE, intTI_type_node,
9689 = build_function_type (long_long_unsigned_type_node,
9690 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9691 tree_cons (NULL_TREE,
9692 long_long_unsigned_type_node,
9695 /* Add all builtins that are more or less simple operations on two
9697 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9699 /* Use one of the operands; the target can have a different mode for
9700 mask-generating compares. */
9701 enum machine_mode mode;
9706 mode = insn_data[d->icode].operand[1].mode;
9708 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9714 type = v4sf_ftype_v4sf_v4sf;
9717 type = v8qi_ftype_v8qi_v8qi;
9720 type = v4hi_ftype_v4hi_v4hi;
9723 type = v2si_ftype_v2si_v2si;
9726 type = ti_ftype_ti_ti;
9729 type = di_ftype_di_di;
9736 /* Override for comparisons. */
9737 if (d->icode == CODE_FOR_maskcmpv4sf3
9738 || d->icode == CODE_FOR_maskncmpv4sf3
9739 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9740 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9741 type = v4si_ftype_v4sf_v4sf;
9743 def_builtin (d->name, type, d->code);
9746 /* Add the remaining MMX insns with somewhat more complicated types. */
9747 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9748 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9749 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9750 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9751 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9752 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9753 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9754 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9755 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9757 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9758 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9759 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9761 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9762 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9764 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9765 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9767 /* Everything beyond this point is SSE only. */
9771 /* comi/ucomi insns. */
9772 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9773 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9775 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9776 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9777 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9779 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9780 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9781 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9782 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9783 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9784 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9786 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9787 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9789 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9791 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9792 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9793 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9794 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9795 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9796 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9798 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9799 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9800 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9801 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9803 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9804 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9805 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9806 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9808 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9809 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9811 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9813 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9814 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9815 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9816 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9817 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9818 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9820 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9822 /* Composite intrinsics. */
9823 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9824 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9825 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9826 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9827 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9828 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9829 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9832 /* Errors in the source file can cause expand_expr to return const0_rtx
9833 where we expect a vector. To avoid crashing, use one of the vector
9834 clear instructions. */
9836 safe_vector_operand (x, mode)
9838 enum machine_mode mode;
9840 if (x != const0_rtx)
9842 x = gen_reg_rtx (mode);
9844 if (VALID_MMX_REG_MODE (mode))
9845 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9846 : gen_rtx_SUBREG (DImode, x, 0)));
9848 emit_insn (gen_sse_clrti (mode == TImode ? x
9849 : gen_rtx_SUBREG (TImode, x, 0)));
9853 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
9856 ix86_expand_binop_builtin (icode, arglist, target)
9857 enum insn_code icode;
9862 tree arg0 = TREE_VALUE (arglist);
9863 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9864 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9865 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9866 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9867 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9868 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9870 if (VECTOR_MODE_P (mode0))
9871 op0 = safe_vector_operand (op0, mode0);
9872 if (VECTOR_MODE_P (mode1))
9873 op1 = safe_vector_operand (op1, mode1);
9876 || GET_MODE (target) != tmode
9877 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9878 target = gen_reg_rtx (tmode);
9880 /* In case the insn wants input operands in modes different from
9881 the result, abort. */
9882 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9885 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9886 op0 = copy_to_mode_reg (mode0, op0);
9887 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9888 op1 = copy_to_mode_reg (mode1, op1);
9890 pat = GEN_FCN (icode) (target, op0, op1);
9897 /* Subroutine of ix86_expand_builtin to take care of stores. */
9900 ix86_expand_store_builtin (icode, arglist, shuffle)
9901 enum insn_code icode;
9906 tree arg0 = TREE_VALUE (arglist);
9907 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9908 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9909 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9910 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9911 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9913 if (VECTOR_MODE_P (mode1))
9914 op1 = safe_vector_operand (op1, mode1);
9916 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9917 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9918 op1 = copy_to_mode_reg (mode1, op1);
9920 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9921 pat = GEN_FCN (icode) (op0, op1);
9927 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
9930 ix86_expand_unop_builtin (icode, arglist, target, do_load)
9931 enum insn_code icode;
9937 tree arg0 = TREE_VALUE (arglist);
9938 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9939 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9940 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9943 || GET_MODE (target) != tmode
9944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9945 target = gen_reg_rtx (tmode);
9947 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9950 if (VECTOR_MODE_P (mode0))
9951 op0 = safe_vector_operand (op0, mode0);
9953 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9954 op0 = copy_to_mode_reg (mode0, op0);
9957 pat = GEN_FCN (icode) (target, op0);
9964 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9965 sqrtss, rsqrtss, rcpss. */
9968 ix86_expand_unop1_builtin (icode, arglist, target)
9969 enum insn_code icode;
9974 tree arg0 = TREE_VALUE (arglist);
9975 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9976 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9977 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9980 || GET_MODE (target) != tmode
9981 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9982 target = gen_reg_rtx (tmode);
9984 if (VECTOR_MODE_P (mode0))
9985 op0 = safe_vector_operand (op0, mode0);
9987 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9988 op0 = copy_to_mode_reg (mode0, op0);
9990 pat = GEN_FCN (icode) (target, op0, op0);
9997 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
10000 ix86_expand_sse_compare (d, arglist, target)
10001 struct builtin_description *d;
10006 tree arg0 = TREE_VALUE (arglist);
10007 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10008 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10009 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10011 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
10012 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
10013 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
10014 enum rtx_code comparison = d->comparison;
10016 if (VECTOR_MODE_P (mode0))
10017 op0 = safe_vector_operand (op0, mode0);
10018 if (VECTOR_MODE_P (mode1))
10019 op1 = safe_vector_operand (op1, mode1);
10021 /* Swap operands if we have a comparison that isn't available in
10025 target = gen_reg_rtx (tmode);
10026 emit_move_insn (target, op1);
10029 comparison = swap_condition (comparison);
10032 || GET_MODE (target) != tmode
10033 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
10034 target = gen_reg_rtx (tmode);
10036 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
10037 op0 = copy_to_mode_reg (mode0, op0);
10038 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
10039 op1 = copy_to_mode_reg (mode1, op1);
10041 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10042 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
10049 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
10052 ix86_expand_sse_comi (d, arglist, target)
10053 struct builtin_description *d;
10058 tree arg0 = TREE_VALUE (arglist);
10059 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10060 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10061 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10063 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
10064 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
10065 enum rtx_code comparison = d->comparison;
10067 if (VECTOR_MODE_P (mode0))
10068 op0 = safe_vector_operand (op0, mode0);
10069 if (VECTOR_MODE_P (mode1))
10070 op1 = safe_vector_operand (op1, mode1);
10072 /* Swap operands if we have a comparison that isn't available in
10079 comparison = swap_condition (comparison);
10082 target = gen_reg_rtx (SImode);
10083 emit_move_insn (target, const0_rtx);
10084 target = gen_rtx_SUBREG (QImode, target, 0);
10086 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
10087 op0 = copy_to_mode_reg (mode0, op0);
10088 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
10089 op1 = copy_to_mode_reg (mode1, op1);
10091 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
10092 pat = GEN_FCN (d->icode) (op0, op1, op2);
10096 emit_insn (gen_setcc_2 (target, op2));
10101 /* Expand an expression EXP that calls a built-in function,
10102 with result going to TARGET if that's convenient
10103 (and in mode MODE if that's convenient).
10104 SUBTARGET may be used as the target for computing one of EXP's operands.
10105 IGNORE is nonzero if the value is to be ignored. */
10108 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
10111 rtx subtarget ATTRIBUTE_UNUSED;
10112 enum machine_mode mode ATTRIBUTE_UNUSED;
10113 int ignore ATTRIBUTE_UNUSED;
10115 struct builtin_description *d;
10117 enum insn_code icode;
10118 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
10119 tree arglist = TREE_OPERAND (exp, 1);
10120 tree arg0, arg1, arg2, arg3;
10121 rtx op0, op1, op2, pat;
10122 enum machine_mode tmode, mode0, mode1, mode2;
10123 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10127 case IX86_BUILTIN_EMMS:
10128 emit_insn (gen_emms ());
10131 case IX86_BUILTIN_SFENCE:
10132 emit_insn (gen_sfence ());
10135 case IX86_BUILTIN_M_FROM_INT:
10136 target = gen_reg_rtx (DImode);
10137 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10138 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
10141 case IX86_BUILTIN_M_TO_INT:
10142 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10143 op0 = copy_to_mode_reg (DImode, op0);
10144 target = gen_reg_rtx (SImode);
10145 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
10148 case IX86_BUILTIN_PEXTRW:
10149 icode = CODE_FOR_mmx_pextrw;
10150 arg0 = TREE_VALUE (arglist);
10151 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10152 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10153 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10154 tmode = insn_data[icode].operand[0].mode;
10155 mode0 = insn_data[icode].operand[1].mode;
10156 mode1 = insn_data[icode].operand[2].mode;
10158 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10159 op0 = copy_to_mode_reg (mode0, op0);
10160 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10162 /* @@@ better error message */
10163 error ("selector must be an immediate");
10167 || GET_MODE (target) != tmode
10168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10169 target = gen_reg_rtx (tmode);
10170 pat = GEN_FCN (icode) (target, op0, op1);
10176 case IX86_BUILTIN_PINSRW:
10177 icode = CODE_FOR_mmx_pinsrw;
10178 arg0 = TREE_VALUE (arglist);
10179 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10180 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10181 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10182 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10183 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10184 tmode = insn_data[icode].operand[0].mode;
10185 mode0 = insn_data[icode].operand[1].mode;
10186 mode1 = insn_data[icode].operand[2].mode;
10187 mode2 = insn_data[icode].operand[3].mode;
10189 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10190 op0 = copy_to_mode_reg (mode0, op0);
10191 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10192 op1 = copy_to_mode_reg (mode1, op1);
10193 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10195 /* @@@ better error message */
10196 error ("selector must be an immediate");
10200 || GET_MODE (target) != tmode
10201 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10202 target = gen_reg_rtx (tmode);
10203 pat = GEN_FCN (icode) (target, op0, op1, op2);
10209 case IX86_BUILTIN_MASKMOVQ:
10210 icode = CODE_FOR_mmx_maskmovq;
10211 /* Note the arg order is different from the operand order. */
10212 arg1 = TREE_VALUE (arglist);
10213 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
10214 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10215 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10216 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10217 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10218 mode0 = insn_data[icode].operand[0].mode;
10219 mode1 = insn_data[icode].operand[1].mode;
10220 mode2 = insn_data[icode].operand[2].mode;
10222 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10223 op0 = copy_to_mode_reg (mode0, op0);
10224 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10225 op1 = copy_to_mode_reg (mode1, op1);
10226 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
10227 op2 = copy_to_mode_reg (mode2, op2);
10228 pat = GEN_FCN (icode) (op0, op1, op2);
10234 case IX86_BUILTIN_SQRTSS:
10235 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10236 case IX86_BUILTIN_RSQRTSS:
10237 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10238 case IX86_BUILTIN_RCPSS:
10239 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10241 case IX86_BUILTIN_LOADAPS:
10242 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10244 case IX86_BUILTIN_LOADUPS:
10245 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10247 case IX86_BUILTIN_STOREAPS:
10248 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10249 case IX86_BUILTIN_STOREUPS:
10250 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10252 case IX86_BUILTIN_LOADSS:
10253 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10255 case IX86_BUILTIN_STORESS:
10256 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10258 case IX86_BUILTIN_LOADHPS:
10259 case IX86_BUILTIN_LOADLPS:
10260 icode = (fcode == IX86_BUILTIN_LOADHPS
10261 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10262 arg0 = TREE_VALUE (arglist);
10263 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10264 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10265 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10266 tmode = insn_data[icode].operand[0].mode;
10267 mode0 = insn_data[icode].operand[1].mode;
10268 mode1 = insn_data[icode].operand[2].mode;
10270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10271 op0 = copy_to_mode_reg (mode0, op0);
10272 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10274 || GET_MODE (target) != tmode
10275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10276 target = gen_reg_rtx (tmode);
10277 pat = GEN_FCN (icode) (target, op0, op1);
10283 case IX86_BUILTIN_STOREHPS:
10284 case IX86_BUILTIN_STORELPS:
10285 icode = (fcode == IX86_BUILTIN_STOREHPS
10286 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10287 arg0 = TREE_VALUE (arglist);
10288 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10289 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10290 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10291 mode0 = insn_data[icode].operand[1].mode;
10292 mode1 = insn_data[icode].operand[2].mode;
10294 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10296 op1 = copy_to_mode_reg (mode1, op1);
10298 pat = GEN_FCN (icode) (op0, op0, op1);
10304 case IX86_BUILTIN_MOVNTPS:
10305 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10306 case IX86_BUILTIN_MOVNTQ:
10307 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10309 case IX86_BUILTIN_LDMXCSR:
10310 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10311 target = assign_386_stack_local (SImode, 0);
10312 emit_move_insn (target, op0);
10313 emit_insn (gen_ldmxcsr (target));
10316 case IX86_BUILTIN_STMXCSR:
10317 target = assign_386_stack_local (SImode, 0);
10318 emit_insn (gen_stmxcsr (target));
10319 return copy_to_mode_reg (SImode, target);
10321 case IX86_BUILTIN_PREFETCH:
10322 icode = CODE_FOR_prefetch;
10323 arg0 = TREE_VALUE (arglist);
10324 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10325 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10326 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10327 mode0 = insn_data[icode].operand[0].mode;
10328 mode1 = insn_data[icode].operand[1].mode;
10330 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10332 /* @@@ better error message */
10333 error ("selector must be an immediate");
10337 op0 = copy_to_mode_reg (Pmode, op0);
10338 pat = GEN_FCN (icode) (op0, op1);
10344 case IX86_BUILTIN_SHUFPS:
10345 icode = CODE_FOR_sse_shufps;
10346 arg0 = TREE_VALUE (arglist);
10347 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10348 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10349 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10350 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10351 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10352 tmode = insn_data[icode].operand[0].mode;
10353 mode0 = insn_data[icode].operand[1].mode;
10354 mode1 = insn_data[icode].operand[2].mode;
10355 mode2 = insn_data[icode].operand[3].mode;
10357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10358 op0 = copy_to_mode_reg (mode0, op0);
10359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10360 op1 = copy_to_mode_reg (mode1, op1);
10361 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10363 /* @@@ better error message */
10364 error ("mask must be an immediate");
10368 || GET_MODE (target) != tmode
10369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10370 target = gen_reg_rtx (tmode);
10371 pat = GEN_FCN (icode) (target, op0, op1, op2);
10377 case IX86_BUILTIN_PSHUFW:
10378 icode = CODE_FOR_mmx_pshufw;
10379 arg0 = TREE_VALUE (arglist);
10380 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10381 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10382 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10383 tmode = insn_data[icode].operand[0].mode;
10384 mode0 = insn_data[icode].operand[2].mode;
10385 mode1 = insn_data[icode].operand[3].mode;
10387 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10388 op0 = copy_to_mode_reg (mode0, op0);
10389 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10391 /* @@@ better error message */
10392 error ("mask must be an immediate");
10396 || GET_MODE (target) != tmode
10397 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10398 target = gen_reg_rtx (tmode);
10399 pat = GEN_FCN (icode) (target, target, op0, op1);
10405 /* Composite intrinsics. */
10406 case IX86_BUILTIN_SETPS1:
10407 target = assign_386_stack_local (SFmode, 0);
10408 arg0 = TREE_VALUE (arglist);
10409 emit_move_insn (adjust_address (target, SFmode, 0),
10410 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10411 op0 = gen_reg_rtx (V4SFmode);
10412 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
10413 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10416 case IX86_BUILTIN_SETPS:
10417 target = assign_386_stack_local (V4SFmode, 0);
10418 arg0 = TREE_VALUE (arglist);
10419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10420 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10421 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
10422 emit_move_insn (adjust_address (target, SFmode, 0),
10423 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10424 emit_move_insn (adjust_address (target, SFmode, 4),
10425 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
10426 emit_move_insn (adjust_address (target, SFmode, 8),
10427 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
10428 emit_move_insn (adjust_address (target, SFmode, 12),
10429 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10430 op0 = gen_reg_rtx (V4SFmode);
10431 emit_insn (gen_sse_movaps (op0, target));
10434 case IX86_BUILTIN_CLRPS:
10435 target = gen_reg_rtx (TImode);
10436 emit_insn (gen_sse_clrti (target));
10439 case IX86_BUILTIN_LOADRPS:
10440 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10441 gen_reg_rtx (V4SFmode), 1);
10442 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10445 case IX86_BUILTIN_LOADPS1:
10446 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10447 gen_reg_rtx (V4SFmode), 1);
10448 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10451 case IX86_BUILTIN_STOREPS1:
10452 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10453 case IX86_BUILTIN_STORERPS:
10454 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10456 case IX86_BUILTIN_MMX_ZERO:
10457 target = gen_reg_rtx (DImode);
10458 emit_insn (gen_mmx_clrdi (target));
10465 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10466 if (d->code == fcode)
10468 /* Compares are treated specially. */
10469 if (d->icode == CODE_FOR_maskcmpv4sf3
10470 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10471 || d->icode == CODE_FOR_maskncmpv4sf3
10472 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10473 return ix86_expand_sse_compare (d, arglist, target);
10475 return ix86_expand_binop_builtin (d->icode, arglist, target);
10478 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10479 if (d->code == fcode)
10480 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
10482 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10483 if (d->code == fcode)
10484 return ix86_expand_sse_comi (d, arglist, target);
10486 /* @@@ Should really do something sensible here. */
10490 /* Store OPERAND to the memory after reload is completed. This means
10491 that we can't easilly use assign_stack_local. */
10493 ix86_force_to_memory (mode, operand)
10494 enum machine_mode mode;
10498 if (!reload_completed)
10500 if (TARGET_64BIT && TARGET_RED_ZONE)
10502 result = gen_rtx_MEM (mode,
10503 gen_rtx_PLUS (Pmode,
10505 GEN_INT (-RED_ZONE_SIZE)));
10506 emit_move_insn (result, operand);
10508 else if (TARGET_64BIT && !TARGET_RED_ZONE)
10514 operand = gen_lowpart (DImode, operand);
10518 gen_rtx_SET (VOIDmode,
10519 gen_rtx_MEM (DImode,
10520 gen_rtx_PRE_DEC (DImode,
10521 stack_pointer_rtx)),
10527 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10536 split_di (&operand, 1, operands, operands + 1);
10538 gen_rtx_SET (VOIDmode,
10539 gen_rtx_MEM (SImode,
10540 gen_rtx_PRE_DEC (Pmode,
10541 stack_pointer_rtx)),
10544 gen_rtx_SET (VOIDmode,
10545 gen_rtx_MEM (SImode,
10546 gen_rtx_PRE_DEC (Pmode,
10547 stack_pointer_rtx)),
10552 /* It is better to store HImodes as SImodes. */
10553 if (!TARGET_PARTIAL_REG_STALL)
10554 operand = gen_lowpart (SImode, operand);
10558 gen_rtx_SET (VOIDmode,
10559 gen_rtx_MEM (GET_MODE (operand),
10560 gen_rtx_PRE_DEC (SImode,
10561 stack_pointer_rtx)),
10567 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10572 /* Free operand from the memory. */
10574 ix86_free_from_memory (mode)
10575 enum machine_mode mode;
10577 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10581 if (mode == DImode || TARGET_64BIT)
10583 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10587 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10588 to pop or add instruction if registers are available. */
10589 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10590 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10595 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10596 QImode must go into class Q_REGS.
10597 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10598 movdf to do mem-to-mem moves through integer regs. */
10600 ix86_preferred_reload_class (x, class)
10602 enum reg_class class;
10604 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10606 /* SSE can't load any constant directly yet. */
10607 if (SSE_CLASS_P (class))
10609 /* Floats can load 0 and 1. */
10610 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10612 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10613 if (MAYBE_SSE_CLASS_P (class))
10614 return (reg_class_subset_p (class, GENERAL_REGS)
10615 ? GENERAL_REGS : FLOAT_REGS);
10619 /* General regs can load everything. */
10620 if (reg_class_subset_p (class, GENERAL_REGS))
10621 return GENERAL_REGS;
10622 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10623 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10626 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10628 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10633 /* If we are copying between general and FP registers, we need a memory
10634 location. The same is true for SSE and MMX registers.
10636 The macro can't work reliably when one of the CLASSES is class containing
10637 registers from multiple units (SSE, MMX, integer). We avoid this by never
10638 combining those units in single alternative in the machine description.
10639 Ensure that this constraint holds to avoid unexpected surprises.
10641 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10642 enforce these sanity checks. */
10644 ix86_secondary_memory_needed (class1, class2, mode, strict)
10645 enum reg_class class1, class2;
10646 enum machine_mode mode;
10649 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10650 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10651 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10652 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10653 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10654 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10661 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10662 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10663 && (mode) != SImode)
10664 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10665 && (mode) != SImode));
10667 /* Return the cost of moving data from a register in class CLASS1 to
10668 one in class CLASS2.
10670 It is not required that the cost always equal 2 when FROM is the same as TO;
10671 on some machines it is expensive to move between registers if they are not
10672 general registers. */
10674 ix86_register_move_cost (mode, class1, class2)
10675 enum machine_mode mode;
10676 enum reg_class class1, class2;
10678 /* In case we require secondary memory, compute cost of the store followed
10679 by load. In case of copying from general_purpose_register we may emit
10680 multiple stores followed by single load causing memory size mismatch
10681 stall. Count this as arbitarily high cost of 20. */
10682 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10685 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
10687 return (MEMORY_MOVE_COST (mode, class1, 0)
10688 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
10690 /* Moves between SSE/MMX and integer unit are expensive. */
10691 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10692 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
10693 return ix86_cost->mmxsse_to_integer;
10694 if (MAYBE_FLOAT_CLASS_P (class1))
10695 return ix86_cost->fp_move;
10696 if (MAYBE_SSE_CLASS_P (class1))
10697 return ix86_cost->sse_move;
10698 if (MAYBE_MMX_CLASS_P (class1))
10699 return ix86_cost->mmx_move;
10703 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10705 ix86_hard_regno_mode_ok (regno, mode)
10707 enum machine_mode mode;
10709 /* Flags and only flags can only hold CCmode values. */
10710 if (CC_REGNO_P (regno))
10711 return GET_MODE_CLASS (mode) == MODE_CC;
10712 if (GET_MODE_CLASS (mode) == MODE_CC
10713 || GET_MODE_CLASS (mode) == MODE_RANDOM
10714 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10716 if (FP_REGNO_P (regno))
10717 return VALID_FP_MODE_P (mode);
10718 if (SSE_REGNO_P (regno))
10719 return VALID_SSE_REG_MODE (mode);
10720 if (MMX_REGNO_P (regno))
10721 return VALID_MMX_REG_MODE (mode);
10722 /* We handle both integer and floats in the general purpose registers.
10723 In future we should be able to handle vector modes as well. */
10724 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10726 /* Take care for QImode values - they can be in non-QI regs, but then
10727 they do cause partial register stalls. */
10728 if (regno < 4 || mode != QImode || TARGET_64BIT)
10730 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10733 /* Return the cost of moving data of mode M between a
10734 register and memory. A value of 2 is the default; this cost is
10735 relative to those in `REGISTER_MOVE_COST'.
10737 If moving between registers and memory is more expensive than
10738 between two registers, you should define this macro to express the
10741 Model also increased moving costs of QImode registers in non
10745 ix86_memory_move_cost (mode, class, in)
10746 enum machine_mode mode;
10747 enum reg_class class;
10750 if (FLOAT_CLASS_P (class))
10768 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10770 if (SSE_CLASS_P (class))
10773 switch (GET_MODE_SIZE (mode))
10787 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10789 if (MMX_CLASS_P (class))
10792 switch (GET_MODE_SIZE (mode))
10803 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10805 switch (GET_MODE_SIZE (mode))
10809 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10810 : ix86_cost->movzbl_load);
10812 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10813 : ix86_cost->int_store[0] + 4);
10816 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10818 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10819 if (mode == TFmode)
10821 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
10822 * (int) GET_MODE_SIZE (mode) / 4);
10826 #ifdef DO_GLOBAL_CTORS_BODY
10828 ix86_svr3_asm_out_constructor (symbol, priority)
10830 int priority ATTRIBUTE_UNUSED;
10833 fputs ("\tpushl $", asm_out_file);
10834 assemble_name (asm_out_file, XSTR (symbol, 0));
10835 fputc ('\n', asm_out_file);
10839 #if defined(TARGET_ELF) && defined(TARGET_COFF)
10841 sco_asm_named_section (name, flags)
10843 unsigned int flags;
10846 default_elf_asm_named_section (name, flags);
10848 default_coff_asm_named_section (name, flags);
10852 sco_asm_out_constructor (symbol, priority)
10857 default_named_section_asm_out_constrctor (symbol, priority);
10859 ix86_svr3_asm_out_constructor (symbol, priority);