1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
41 #include "basic-block.h"
44 #ifndef CHECK_STACK_LIMIT
45 #define CHECK_STACK_LIMIT -1
48 /* Processor costs (relative to an add) */
49 struct processor_costs i386_cost = { /* 386 specific costs */
50 1, /* cost of an add instruction */
51 1, /* cost of a lea instruction */
52 3, /* variable shift costs */
53 2, /* constant shift costs */
54 6, /* cost of starting a multiply */
55 1, /* cost of multiply per each bit set */
56 23, /* cost of a divide/mod */
57 15, /* "large" insn */
59 4, /* cost for loading QImode using movzbl */
60 {2, 4, 2}, /* cost of loading integer registers
61 in QImode, HImode and SImode.
62 Relative to reg-reg move (2). */
63 {2, 4, 2}, /* cost of storing integer registers */
64 2, /* cost of reg,reg fld/fst */
65 {8, 8, 8}, /* cost of loading fp registers
66 in SFmode, DFmode and XFmode */
67 {8, 8, 8}, /* cost of loading integer registers */
68 2, /* cost of moving MMX register */
69 {4, 8}, /* cost of loading MMX registers
70 in SImode and DImode */
71 {4, 8}, /* cost of storing MMX registers
72 in SImode and DImode */
73 2, /* cost of moving SSE register */
74 {4, 8, 16}, /* cost of loading SSE registers
75 in SImode, DImode and TImode */
76 {4, 8, 16}, /* cost of storing SSE registers
77 in SImode, DImode and TImode */
78 3, /* MMX or SSE register to integer */
81 struct processor_costs i486_cost = { /* 486 specific costs */
82 1, /* cost of an add instruction */
83 1, /* cost of a lea instruction */
84 3, /* variable shift costs */
85 2, /* constant shift costs */
86 12, /* cost of starting a multiply */
87 1, /* cost of multiply per each bit set */
88 40, /* cost of a divide/mod */
89 15, /* "large" insn */
91 4, /* cost for loading QImode using movzbl */
92 {2, 4, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 4, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {8, 8, 8}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {8, 8, 8}, /* cost of loading integer registers */
100 2, /* cost of moving MMX register */
101 {4, 8}, /* cost of loading MMX registers
102 in SImode and DImode */
103 {4, 8}, /* cost of storing MMX registers
104 in SImode and DImode */
105 2, /* cost of moving SSE register */
106 {4, 8, 16}, /* cost of loading SSE registers
107 in SImode, DImode and TImode */
108 {4, 8, 16}, /* cost of storing SSE registers
109 in SImode, DImode and TImode */
110 3 /* MMX or SSE register to integer */
113 struct processor_costs pentium_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 4, /* variable shift costs */
117 1, /* constant shift costs */
118 11, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 25, /* cost of a divide/mod */
121 8, /* "large" insn */
123 6, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6}, /* cost of loading integer registers */
132 8, /* cost of moving MMX register */
133 {8, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {8, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3 /* MMX or SSE register to integer */
145 struct processor_costs pentiumpro_cost = {
146 1, /* cost of an add instruction */
147 1, /* cost of a lea instruction */
148 1, /* variable shift costs */
149 1, /* constant shift costs */
150 4, /* cost of starting a multiply */
151 0, /* cost of multiply per each bit set */
152 17, /* cost of a divide/mod */
153 8, /* "large" insn */
155 2, /* cost for loading QImode using movzbl */
156 {4, 4, 4}, /* cost of loading integer registers
157 in QImode, HImode and SImode.
158 Relative to reg-reg move (2). */
159 {2, 2, 2}, /* cost of storing integer registers */
160 2, /* cost of reg,reg fld/fst */
161 {2, 2, 6}, /* cost of loading fp registers
162 in SFmode, DFmode and XFmode */
163 {4, 4, 6}, /* cost of loading integer registers */
164 2, /* cost of moving MMX register */
165 {2, 2}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {2, 2}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {2, 2, 8}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {2, 2, 8}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3 /* MMX or SSE register to integer */
177 struct processor_costs k6_cost = {
178 1, /* cost of an add instruction */
179 2, /* cost of a lea instruction */
180 1, /* variable shift costs */
181 1, /* constant shift costs */
182 3, /* cost of starting a multiply */
183 0, /* cost of multiply per each bit set */
184 18, /* cost of a divide/mod */
185 8, /* "large" insn */
187 3, /* cost for loading QImode using movzbl */
188 {4, 5, 4}, /* cost of loading integer registers
189 in QImode, HImode and SImode.
190 Relative to reg-reg move (2). */
191 {2, 3, 2}, /* cost of storing integer registers */
192 4, /* cost of reg,reg fld/fst */
193 {6, 6, 6}, /* cost of loading fp registers
194 in SFmode, DFmode and XFmode */
195 {4, 4, 4}, /* cost of loading integer registers */
196 2, /* cost of moving MMX register */
197 {2, 2}, /* cost of loading MMX registers
198 in SImode and DImode */
199 {2, 2}, /* cost of storing MMX registers
200 in SImode and DImode */
201 2, /* cost of moving SSE register */
202 {2, 2, 8}, /* cost of loading SSE registers
203 in SImode, DImode and TImode */
204 {2, 2, 8}, /* cost of storing SSE registers
205 in SImode, DImode and TImode */
206 6 /* MMX or SSE register to integer */
209 struct processor_costs athlon_cost = {
210 1, /* cost of an add instruction */
211 2, /* cost of a lea instruction */
212 1, /* variable shift costs */
213 1, /* constant shift costs */
214 5, /* cost of starting a multiply */
215 0, /* cost of multiply per each bit set */
216 42, /* cost of a divide/mod */
217 8, /* "large" insn */
219 4, /* cost for loading QImode using movzbl */
220 {4, 5, 4}, /* cost of loading integer registers
221 in QImode, HImode and SImode.
222 Relative to reg-reg move (2). */
223 {2, 3, 2}, /* cost of storing integer registers */
224 4, /* cost of reg,reg fld/fst */
225 {6, 6, 20}, /* cost of loading fp registers
226 in SFmode, DFmode and XFmode */
227 {4, 4, 16}, /* cost of loading integer registers */
228 2, /* cost of moving MMX register */
229 {2, 2}, /* cost of loading MMX registers
230 in SImode and DImode */
231 {2, 2}, /* cost of storing MMX registers
232 in SImode and DImode */
233 2, /* cost of moving SSE register */
234 {2, 2, 8}, /* cost of loading SSE registers
235 in SImode, DImode and TImode */
236 {2, 2, 8}, /* cost of storing SSE registers
237 in SImode, DImode and TImode */
238 6 /* MMX or SSE register to integer */
241 struct processor_costs pentium4_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 8, /* variable shift costs */
245 8, /* constant shift costs */
246 30, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 112, /* cost of a divide/mod */
249 16, /* "large" insn */
251 2, /* cost for loading QImode using movzbl */
252 {4, 5, 4}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 3, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of loading integer registers */
260 2, /* cost of moving MMX register */
261 {2, 2}, /* cost of loading MMX registers
262 in SImode and DImode */
263 {2, 2}, /* cost of storing MMX registers
264 in SImode and DImode */
265 12, /* cost of moving SSE register */
266 {12, 12, 12}, /* cost of loading SSE registers
267 in SImode, DImode and TImode */
268 {2, 2, 8}, /* cost of storing SSE registers
269 in SImode, DImode and TImode */
270 10, /* MMX or SSE register to integer */
273 struct processor_costs *ix86_cost = &pentium_cost;
275 /* Processor feature/optimization bitmasks. */
276 #define m_386 (1<<PROCESSOR_I386)
277 #define m_486 (1<<PROCESSOR_I486)
278 #define m_PENT (1<<PROCESSOR_PENTIUM)
279 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
280 #define m_K6 (1<<PROCESSOR_K6)
281 #define m_ATHLON (1<<PROCESSOR_ATHLON)
282 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
284 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
285 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
286 const int x86_zero_extend_with_and = m_486 | m_PENT;
287 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
288 const int x86_double_with_add = ~m_386;
289 const int x86_use_bit_test = m_386;
290 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
291 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
292 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
293 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
294 const int x86_partial_reg_stall = m_PPRO;
295 const int x86_use_loop = m_K6;
296 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
297 const int x86_use_mov0 = m_K6;
298 const int x86_use_cltd = ~(m_PENT | m_K6);
299 const int x86_read_modify_write = ~m_PENT;
300 const int x86_read_modify = ~(m_PENT | m_PPRO);
301 const int x86_split_long_moves = m_PPRO;
302 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
303 const int x86_single_stringop = m_386 | m_PENT4;
304 const int x86_qimode_math = ~(0);
305 const int x86_promote_qi_regs = 0;
306 const int x86_himode_math = ~(m_PPRO);
307 const int x86_promote_hi_regs = m_PPRO;
308 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
309 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
310 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
311 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
312 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
313 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
314 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
316 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
318 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
319 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
320 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
322 /* Array of the smallest class containing reg number REGNO, indexed by
323 REGNO. Used by REGNO_REG_CLASS in i386.h. */
325 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
328 AREG, DREG, CREG, BREG,
330 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
332 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
333 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
336 /* flags, fpsr, dirflag, frame */
337 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
338 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
340 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
342 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
343 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
344 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
348 /* The "default" register map used in 32bit mode. */
350 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
352 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
353 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
354 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
355 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
356 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
361 /* The "default" register map used in 64bit mode. */
362 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
364 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
365 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
366 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
367 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
368 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
369 8,9,10,11,12,13,14,15, /* extended integer registers */
370 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
373 /* Define the register numbers to be used in Dwarf debugging information.
374 The SVR4 reference port C compiler uses the following register numbers
375 in its Dwarf output code:
376 0 for %eax (gcc regno = 0)
377 1 for %ecx (gcc regno = 2)
378 2 for %edx (gcc regno = 1)
379 3 for %ebx (gcc regno = 3)
380 4 for %esp (gcc regno = 7)
381 5 for %ebp (gcc regno = 6)
382 6 for %esi (gcc regno = 4)
383 7 for %edi (gcc regno = 5)
384 The following three DWARF register numbers are never generated by
385 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
386 believes these numbers have these meanings.
387 8 for %eip (no gcc equivalent)
388 9 for %eflags (gcc regno = 17)
389 10 for %trapno (no gcc equivalent)
390 It is not at all clear how we should number the FP stack registers
391 for the x86 architecture. If the version of SDB on x86/svr4 were
392 a bit less brain dead with respect to floating-point then we would
393 have a precedent to follow with respect to DWARF register numbers
394 for x86 FP registers, but the SDB on x86/svr4 is so completely
395 broken with respect to FP registers that it is hardly worth thinking
396 of it as something to strive for compatibility with.
397 The version of x86/svr4 SDB I have at the moment does (partially)
398 seem to believe that DWARF register number 11 is associated with
399 the x86 register %st(0), but that's about all. Higher DWARF
400 register numbers don't seem to be associated with anything in
401 particular, and even for DWARF regno 11, SDB only seems to under-
402 stand that it should say that a variable lives in %st(0) (when
403 asked via an `=' command) if we said it was in DWARF regno 11,
404 but SDB still prints garbage when asked for the value of the
405 variable in question (via a `/' command).
406 (Also note that the labels SDB prints for various FP stack regs
407 when doing an `x' command are all wrong.)
408 Note that these problems generally don't affect the native SVR4
409 C compiler because it doesn't allow the use of -O with -g and
410 because when it is *not* optimizing, it allocates a memory
411 location for each floating-point variable, and the memory
412 location is what gets described in the DWARF AT_location
413 attribute for the variable in question.
414 Regardless of the severe mental illness of the x86/svr4 SDB, we
415 do something sensible here and we use the following DWARF
416 register numbers. Note that these are all stack-top-relative
418 11 for %st(0) (gcc regno = 8)
419 12 for %st(1) (gcc regno = 9)
420 13 for %st(2) (gcc regno = 10)
421 14 for %st(3) (gcc regno = 11)
422 15 for %st(4) (gcc regno = 12)
423 16 for %st(5) (gcc regno = 13)
424 17 for %st(6) (gcc regno = 14)
425 18 for %st(7) (gcc regno = 15)
427 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
429 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
430 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
431 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
432 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
433 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
434 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
438 /* Test and compare insns in i386.md store the information needed to
439 generate branch and scc insns here. */
441 struct rtx_def *ix86_compare_op0 = NULL_RTX;
442 struct rtx_def *ix86_compare_op1 = NULL_RTX;
444 #define MAX_386_STACK_LOCALS 2
445 /* Size of the register save area. */
446 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
448 /* Define the structure for the machine field in struct function. */
449 struct machine_function
451 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
452 int save_varrargs_registers;
453 int accesses_prev_frame;
456 #define ix86_stack_locals (cfun->machine->stack_locals)
457 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
459 /* Structure describing stack frame layout.
460 Stack grows downward:
466 saved frame pointer if frame_pointer_needed
467 <- HARD_FRAME_POINTER
473 > to_allocate <- FRAME_POINTER
485 int outgoing_arguments_size;
488 HOST_WIDE_INT to_allocate;
489 /* The offsets relative to ARG_POINTER. */
490 HOST_WIDE_INT frame_pointer_offset;
491 HOST_WIDE_INT hard_frame_pointer_offset;
492 HOST_WIDE_INT stack_pointer_offset;
495 /* Code model option as passed by user. */
496 const char *ix86_cmodel_string;
498 enum cmodel ix86_cmodel;
500 /* which cpu are we scheduling for */
501 enum processor_type ix86_cpu;
503 /* which instruction set architecture to use. */
506 /* Strings to hold which cpu and instruction set architecture to use. */
507 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
508 const char *ix86_arch_string; /* for -march=<xxx> */
510 /* # of registers to use to pass arguments. */
511 const char *ix86_regparm_string;
513 /* ix86_regparm_string as a number */
516 /* Alignment to use for loops and jumps: */
518 /* Power of two alignment for loops. */
519 const char *ix86_align_loops_string;
521 /* Power of two alignment for non-loop jumps. */
522 const char *ix86_align_jumps_string;
524 /* Power of two alignment for stack boundary in bytes. */
525 const char *ix86_preferred_stack_boundary_string;
527 /* Preferred alignment for stack boundary in bits. */
528 int ix86_preferred_stack_boundary;
530 /* Values 1-5: see jump.c */
531 int ix86_branch_cost;
532 const char *ix86_branch_cost_string;
534 /* Power of two alignment for functions. */
535 int ix86_align_funcs;
536 const char *ix86_align_funcs_string;
538 /* Power of two alignment for loops. */
539 int ix86_align_loops;
541 /* Power of two alignment for non-loop jumps. */
542 int ix86_align_jumps;
544 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
545 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
547 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
548 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
550 static rtx gen_push PARAMS ((rtx));
551 static int memory_address_length PARAMS ((rtx addr));
552 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
553 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
554 static int ix86_safe_length PARAMS ((rtx));
555 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
556 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
557 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
558 static void ix86_dump_ppro_packet PARAMS ((FILE *));
559 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
560 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
562 static void ix86_init_machine_status PARAMS ((struct function *));
563 static void ix86_mark_machine_status PARAMS ((struct function *));
564 static void ix86_free_machine_status PARAMS ((struct function *));
565 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
566 static int ix86_safe_length_prefix PARAMS ((rtx));
567 static int ix86_nsaved_regs PARAMS((void));
568 static void ix86_emit_save_regs PARAMS((void));
569 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, bool));
570 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
571 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
572 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
573 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
574 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
575 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
576 static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
577 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
578 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
582 rtx base, index, disp;
586 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
588 struct builtin_description;
589 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
591 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
593 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
594 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
595 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
596 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
597 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
598 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
599 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
603 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
605 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
606 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
607 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
608 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
609 static int ix86_save_reg PARAMS ((int, bool));
610 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
612 /* Sometimes certain combinations of command options do not make
613 sense on a particular target machine. You can define a macro
614 `OVERRIDE_OPTIONS' to take account of this. This macro, if
615 defined, is executed once just after all the command options have
618 Don't use this macro to turn on various extra optimizations for
619 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
625 /* Comes from final.c -- no real reason to change it. */
626 #define MAX_CODE_ALIGN 16
630 struct processor_costs *cost; /* Processor costs */
631 int target_enable; /* Target flags to enable. */
632 int target_disable; /* Target flags to disable. */
633 int align_loop; /* Default alignments. */
638 const processor_target_table[PROCESSOR_max] =
640 {&i386_cost, 0, 0, 2, 2, 2, 1},
641 {&i486_cost, 0, 0, 4, 4, 4, 1},
642 {&pentium_cost, 0, 0, -4, -4, -4, 1},
643 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
644 {&k6_cost, 0, 0, -5, -5, 4, 1},
645 {&athlon_cost, 0, 0, 4, -4, 4, 1},
646 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
651 const char *name; /* processor name or nickname. */
652 enum processor_type processor;
654 const processor_alias_table[] =
656 {"i386", PROCESSOR_I386},
657 {"i486", PROCESSOR_I486},
658 {"i586", PROCESSOR_PENTIUM},
659 {"pentium", PROCESSOR_PENTIUM},
660 {"i686", PROCESSOR_PENTIUMPRO},
661 {"pentiumpro", PROCESSOR_PENTIUMPRO},
662 {"k6", PROCESSOR_K6},
663 {"athlon", PROCESSOR_ATHLON},
664 {"pentium4", PROCESSOR_PENTIUM4},
667 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
669 #ifdef SUBTARGET_OVERRIDE_OPTIONS
670 SUBTARGET_OVERRIDE_OPTIONS;
673 ix86_arch = PROCESSOR_I386;
674 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
676 if (ix86_cmodel_string != 0)
678 if (!strcmp (ix86_cmodel_string, "small"))
679 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
681 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
682 else if (!strcmp (ix86_cmodel_string, "32"))
684 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
685 ix86_cmodel = CM_KERNEL;
686 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
687 ix86_cmodel = CM_MEDIUM;
688 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
689 ix86_cmodel = CM_LARGE;
691 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
697 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
699 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
700 error ("Code model `%s' not supported in the %s bit mode.",
701 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
702 if (ix86_cmodel == CM_LARGE)
703 sorry ("Code model `large' not supported yet.");
705 if (ix86_arch_string != 0)
707 for (i = 0; i < pta_size; i++)
708 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
710 ix86_arch = processor_alias_table[i].processor;
711 /* Default cpu tuning to the architecture. */
712 ix86_cpu = ix86_arch;
717 error ("bad value (%s) for -march= switch", ix86_arch_string);
720 if (ix86_cpu_string != 0)
722 for (i = 0; i < pta_size; i++)
723 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
725 ix86_cpu = processor_alias_table[i].processor;
729 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
732 ix86_cost = processor_target_table[ix86_cpu].cost;
733 target_flags |= processor_target_table[ix86_cpu].target_enable;
734 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
736 /* Arrange to set up i386_stack_locals for all functions. */
737 init_machine_status = ix86_init_machine_status;
738 mark_machine_status = ix86_mark_machine_status;
739 free_machine_status = ix86_free_machine_status;
741 /* Validate -mregparm= value. */
742 if (ix86_regparm_string)
744 i = atoi (ix86_regparm_string);
745 if (i < 0 || i > REGPARM_MAX)
746 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
752 ix86_regparm = REGPARM_MAX;
754 /* Validate -malign-loops= value, or provide default. */
755 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
756 if (ix86_align_loops_string)
758 i = atoi (ix86_align_loops_string);
759 if (i < 0 || i > MAX_CODE_ALIGN)
760 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
762 ix86_align_loops = i;
765 /* Validate -malign-jumps= value, or provide default. */
766 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
767 if (ix86_align_jumps_string)
769 i = atoi (ix86_align_jumps_string);
770 if (i < 0 || i > MAX_CODE_ALIGN)
771 error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
773 ix86_align_jumps = i;
776 /* Validate -malign-functions= value, or provide default. */
777 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
778 if (ix86_align_funcs_string)
780 i = atoi (ix86_align_funcs_string);
781 if (i < 0 || i > MAX_CODE_ALIGN)
782 error ("-malign-functions=%d is not between 0 and %d",
785 ix86_align_funcs = i;
788 /* Validate -mpreferred-stack-boundary= value, or provide default.
789 The default of 128 bits is for Pentium III's SSE __m128. */
790 ix86_preferred_stack_boundary = 128;
791 if (ix86_preferred_stack_boundary_string)
793 i = atoi (ix86_preferred_stack_boundary_string);
794 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
795 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
796 TARGET_64BIT ? 3 : 2);
798 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
801 /* Validate -mbranch-cost= value, or provide default. */
802 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
803 if (ix86_branch_cost_string)
805 i = atoi (ix86_branch_cost_string);
807 error ("-mbranch-cost=%d is not between 0 and 5", i);
809 ix86_branch_cost = i;
812 /* Keep nonleaf frame pointers. */
813 if (TARGET_OMIT_LEAF_FRAME_POINTER)
814 flag_omit_frame_pointer = 1;
816 /* If we're doing fast math, we don't care about comparison order
817 wrt NaNs. This lets us use a shorter comparison sequence. */
818 if (flag_unsafe_math_optimizations)
819 target_flags &= ~MASK_IEEE_FP;
821 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
824 target_flags |= MASK_MMX;
828 optimization_options (level, size)
830 int size ATTRIBUTE_UNUSED;
832 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
833 make the problem with not enough registers even worse. */
834 #ifdef INSN_SCHEDULING
836 flag_schedule_insns = 0;
840 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
841 attribute for DECL. The attributes in ATTRIBUTES have previously been
845 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
846 tree decl ATTRIBUTE_UNUSED;
847 tree attributes ATTRIBUTE_UNUSED;
848 tree identifier ATTRIBUTE_UNUSED;
849 tree args ATTRIBUTE_UNUSED;
854 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
855 attribute for TYPE. The attributes in ATTRIBUTES have previously been
859 ix86_valid_type_attribute_p (type, attributes, identifier, args)
861 tree attributes ATTRIBUTE_UNUSED;
865 if (TREE_CODE (type) != FUNCTION_TYPE
866 && TREE_CODE (type) != METHOD_TYPE
867 && TREE_CODE (type) != FIELD_DECL
868 && TREE_CODE (type) != TYPE_DECL)
871 /* Stdcall attribute says callee is responsible for popping arguments
872 if they are not variable. */
873 if (is_attribute_p ("stdcall", identifier)
875 return (args == NULL_TREE);
877 /* Cdecl attribute says the callee is a normal C declaration. */
878 if (is_attribute_p ("cdecl", identifier)
880 return (args == NULL_TREE);
882 /* Regparm attribute specifies how many integer arguments are to be
883 passed in registers. */
884 if (is_attribute_p ("regparm", identifier))
888 if (! args || TREE_CODE (args) != TREE_LIST
889 || TREE_CHAIN (args) != NULL_TREE
890 || TREE_VALUE (args) == NULL_TREE)
893 cst = TREE_VALUE (args);
894 if (TREE_CODE (cst) != INTEGER_CST)
897 if (compare_tree_int (cst, REGPARM_MAX) > 0)
906 /* Return 0 if the attributes for two types are incompatible, 1 if they
907 are compatible, and 2 if they are nearly compatible (which causes a
908 warning to be generated). */
911 ix86_comp_type_attributes (type1, type2)
915 /* Check for mismatch of non-default calling convention. */
916 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
918 if (TREE_CODE (type1) != FUNCTION_TYPE)
921 /* Check for mismatched return types (cdecl vs stdcall). */
922 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
923 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
928 /* Value is the number of bytes of arguments automatically
929 popped when returning from a subroutine call.
930 FUNDECL is the declaration node of the function (as a tree),
931 FUNTYPE is the data type of the function (as a tree),
932 or for a library call it is an identifier node for the subroutine name.
933 SIZE is the number of bytes of arguments passed on the stack.
935 On the 80386, the RTD insn may be used to pop them if the number
936 of args is fixed, but if the number is variable then the caller
937 must pop them all. RTD can't be used for library calls now
938 because the library is compiled with the Unix compiler.
939 Use of RTD is a selectable option, since it is incompatible with
940 standard Unix calling sequences. If the option is not selected,
941 the caller must always pop the args.
943 The attribute stdcall is equivalent to RTD on a per module basis. */
946 ix86_return_pops_args (fundecl, funtype, size)
951 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
953 /* Cdecl functions override -mrtd, and never pop the stack. */
954 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
956 /* Stdcall functions will pop the stack if not variable args. */
957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
961 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
962 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
967 /* Lose any fake structure return argument. */
968 if (aggregate_value_p (TREE_TYPE (funtype))
970 return GET_MODE_SIZE (Pmode);
975 /* Argument support functions. */
977 /* Initialize a variable CUM of type CUMULATIVE_ARGS
978 for a call to a function whose data type is FNTYPE.
979 For a library call, FNTYPE is 0. */
982 init_cumulative_args (cum, fntype, libname)
983 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
984 tree fntype; /* tree ptr for function decl */
985 rtx libname; /* SYMBOL_REF of library name or 0 */
987 static CUMULATIVE_ARGS zero_cum;
988 tree param, next_param;
990 if (TARGET_DEBUG_ARG)
992 fprintf (stderr, "\ninit_cumulative_args (");
994 fprintf (stderr, "fntype code = %s, ret code = %s",
995 tree_code_name[(int) TREE_CODE (fntype)],
996 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
998 fprintf (stderr, "no fntype");
1001 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1006 /* Set up the number of registers to use for passing arguments. */
1007 cum->nregs = ix86_regparm;
1010 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1013 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1016 /* Determine if this function has variable arguments. This is
1017 indicated by the last argument being 'void_type_mode' if there
1018 are no variable arguments. If there are variable arguments, then
1019 we won't pass anything in registers */
1023 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1024 param != 0; param = next_param)
1026 next_param = TREE_CHAIN (param);
1027 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1032 if (TARGET_DEBUG_ARG)
1033 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1038 /* Update the data in CUM to advance over an argument
1039 of mode MODE and data type TYPE.
1040 (TYPE is null for libcalls where that information may not be available.) */
1043 function_arg_advance (cum, mode, type, named)
1044 CUMULATIVE_ARGS *cum; /* current arg information */
1045 enum machine_mode mode; /* current arg mode */
1046 tree type; /* type of the argument or 0 if lib support */
1047 int named; /* whether or not the argument was named */
1050 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1051 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1053 if (TARGET_DEBUG_ARG)
1055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1056 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1057 if (TARGET_SSE && mode == TImode)
1059 cum->sse_words += words;
1060 cum->sse_nregs -= 1;
1061 cum->sse_regno += 1;
1062 if (cum->sse_nregs <= 0)
1070 cum->words += words;
1071 cum->nregs -= words;
1072 cum->regno += words;
1074 if (cum->nregs <= 0)
1083 /* Define where to put the arguments to a function.
1084 Value is zero to push the argument on the stack,
1085 or a hard register in which to store the argument.
1087 MODE is the argument's machine mode.
1088 TYPE is the data type of the argument (as a tree).
1089 This is null for libcalls where that information may
1091 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1092 the preceding args and about the function being called.
1093 NAMED is nonzero if this argument is a named parameter
1094 (otherwise it is an extra parameter matching an ellipsis). */
1097 function_arg (cum, mode, type, named)
1098 CUMULATIVE_ARGS *cum; /* current arg information */
1099 enum machine_mode mode; /* current arg mode */
1100 tree type; /* type of the argument or 0 if lib support */
1101 int named; /* != 0 for normal args, == 0 for ... args */
1105 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1106 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1108 if (mode == VOIDmode)
1113 /* For now, pass fp/complex values on the stack. */
1122 if (words <= cum->nregs)
1123 ret = gen_rtx_REG (mode, cum->regno);
1127 ret = gen_rtx_REG (mode, cum->sse_regno);
1131 if (TARGET_DEBUG_ARG)
1134 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1135 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1138 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1140 fprintf (stderr, ", stack");
1142 fprintf (stderr, " )\n");
1149 /* Return nonzero if OP is general operand representable on x86_64. */
1152 x86_64_general_operand (op, mode)
1154 enum machine_mode mode;
1157 return general_operand (op, mode);
1158 if (nonimmediate_operand (op, mode))
1160 return x86_64_sign_extended_value (op);
1163 /* Return nonzero if OP is general operand representable on x86_64
1164 as eighter sign extended or zero extended constant. */
1167 x86_64_szext_general_operand (op, mode)
1169 enum machine_mode mode;
1172 return general_operand (op, mode);
1173 if (nonimmediate_operand (op, mode))
1175 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1178 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1181 x86_64_nonmemory_operand (op, mode)
1183 enum machine_mode mode;
1186 return nonmemory_operand (op, mode);
1187 if (register_operand (op, mode))
1189 return x86_64_sign_extended_value (op);
1192 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1195 x86_64_movabs_operand (op, mode)
1197 enum machine_mode mode;
1199 if (!TARGET_64BIT || !flag_pic)
1200 return nonmemory_operand (op, mode);
1201 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1203 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1208 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1211 x86_64_szext_nonmemory_operand (op, mode)
1213 enum machine_mode mode;
1216 return nonmemory_operand (op, mode);
1217 if (register_operand (op, mode))
1219 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1222 /* Return nonzero if OP is immediate operand representable on x86_64. */
1225 x86_64_immediate_operand (op, mode)
1227 enum machine_mode mode;
1230 return immediate_operand (op, mode);
1231 return x86_64_sign_extended_value (op);
1234 /* Return nonzero if OP is immediate operand representable on x86_64. */
1237 x86_64_zext_immediate_operand (op, mode)
1239 enum machine_mode mode ATTRIBUTE_UNUSED;
1241 return x86_64_zero_extended_value (op);
1244 /* Return nonzero if OP is (const_int 1), else return zero. */
1247 const_int_1_operand (op, mode)
1249 enum machine_mode mode ATTRIBUTE_UNUSED;
1251 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1254 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1255 reference and a constant. */
1258 symbolic_operand (op, mode)
1260 enum machine_mode mode ATTRIBUTE_UNUSED;
1262 switch (GET_CODE (op))
1270 if (GET_CODE (op) == SYMBOL_REF
1271 || GET_CODE (op) == LABEL_REF
1272 || (GET_CODE (op) == UNSPEC
1273 && XINT (op, 1) >= 6
1274 && XINT (op, 1) <= 7))
1276 if (GET_CODE (op) != PLUS
1277 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1281 if (GET_CODE (op) == SYMBOL_REF
1282 || GET_CODE (op) == LABEL_REF)
1284 /* Only @GOTOFF gets offsets. */
1285 if (GET_CODE (op) != UNSPEC
1286 || XINT (op, 1) != 7)
1289 op = XVECEXP (op, 0, 0);
1290 if (GET_CODE (op) == SYMBOL_REF
1291 || GET_CODE (op) == LABEL_REF)
1300 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1303 pic_symbolic_operand (op, mode)
1305 enum machine_mode mode ATTRIBUTE_UNUSED;
1307 if (GET_CODE (op) == CONST)
1310 if (GET_CODE (op) == UNSPEC)
1312 if (GET_CODE (op) != PLUS
1313 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1316 if (GET_CODE (op) == UNSPEC)
1322 /* Test for a valid operand for a call instruction. Don't allow the
1323 arg pointer register or virtual regs since they may decay into
1324 reg + const, which the patterns can't handle. */
1327 call_insn_operand (op, mode)
1329 enum machine_mode mode ATTRIBUTE_UNUSED;
1331 /* Disallow indirect through a virtual register. This leads to
1332 compiler aborts when trying to eliminate them. */
1333 if (GET_CODE (op) == REG
1334 && (op == arg_pointer_rtx
1335 || op == frame_pointer_rtx
1336 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1337 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1340 /* Disallow `call 1234'. Due to varying assembler lameness this
1341 gets either rejected or translated to `call .+1234'. */
1342 if (GET_CODE (op) == CONST_INT)
1345 /* Explicitly allow SYMBOL_REF even if pic. */
1346 if (GET_CODE (op) == SYMBOL_REF)
1349 /* Half-pic doesn't allow anything but registers and constants.
1350 We've just taken care of the later. */
1352 return register_operand (op, Pmode);
1354 /* Otherwise we can allow any general_operand in the address. */
1355 return general_operand (op, Pmode);
1359 constant_call_address_operand (op, mode)
1361 enum machine_mode mode ATTRIBUTE_UNUSED;
1363 if (GET_CODE (op) == CONST
1364 && GET_CODE (XEXP (op, 0)) == PLUS
1365 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1366 op = XEXP (XEXP (op, 0), 0);
1367 return GET_CODE (op) == SYMBOL_REF;
1370 /* Match exactly zero and one. */
1373 const0_operand (op, mode)
1375 enum machine_mode mode;
1377 return op == CONST0_RTX (mode);
1381 const1_operand (op, mode)
1383 enum machine_mode mode ATTRIBUTE_UNUSED;
1385 return op == const1_rtx;
1388 /* Match 2, 4, or 8. Used for leal multiplicands. */
1391 const248_operand (op, mode)
1393 enum machine_mode mode ATTRIBUTE_UNUSED;
1395 return (GET_CODE (op) == CONST_INT
1396 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1399 /* True if this is a constant appropriate for an increment or decremenmt. */
1402 incdec_operand (op, mode)
1404 enum machine_mode mode;
1406 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1407 registers, since carry flag is not set. */
1408 if (TARGET_PENTIUM4 && !optimize_size)
1410 if (op == const1_rtx || op == constm1_rtx)
1412 if (GET_CODE (op) != CONST_INT)
1414 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1416 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1418 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1423 /* Return nonzero if OP is acceptable as operand of DImode shift
1427 shiftdi_operand (op, mode)
1429 enum machine_mode mode ATTRIBUTE_UNUSED;
1432 return nonimmediate_operand (op, mode);
1434 return register_operand (op, mode);
1437 /* Return false if this is the stack pointer, or any other fake
1438 register eliminable to the stack pointer. Otherwise, this is
1441 This is used to prevent esp from being used as an index reg.
1442 Which would only happen in pathological cases. */
1445 reg_no_sp_operand (op, mode)
1447 enum machine_mode mode;
1450 if (GET_CODE (t) == SUBREG)
1452 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1455 return register_operand (op, mode);
1459 mmx_reg_operand (op, mode)
1461 enum machine_mode mode ATTRIBUTE_UNUSED;
1463 return MMX_REG_P (op);
1466 /* Return false if this is any eliminable register. Otherwise
1470 general_no_elim_operand (op, mode)
1472 enum machine_mode mode;
1475 if (GET_CODE (t) == SUBREG)
1477 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1478 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1479 || t == virtual_stack_dynamic_rtx)
1482 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1483 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1486 return general_operand (op, mode);
1489 /* Return false if this is any eliminable register. Otherwise
1490 register_operand or const_int. */
1493 nonmemory_no_elim_operand (op, mode)
1495 enum machine_mode mode;
1498 if (GET_CODE (t) == SUBREG)
1500 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1501 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1502 || t == virtual_stack_dynamic_rtx)
1505 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1508 /* Return true if op is a Q_REGS class register. */
1511 q_regs_operand (op, mode)
1513 enum machine_mode mode;
1515 if (mode != VOIDmode && GET_MODE (op) != mode)
1517 if (GET_CODE (op) == SUBREG)
1518 op = SUBREG_REG (op);
1519 return QI_REG_P (op);
1522 /* Return true if op is a NON_Q_REGS class register. */
1525 non_q_regs_operand (op, mode)
1527 enum machine_mode mode;
1529 if (mode != VOIDmode && GET_MODE (op) != mode)
1531 if (GET_CODE (op) == SUBREG)
1532 op = SUBREG_REG (op);
1533 return NON_QI_REG_P (op);
1536 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1539 sse_comparison_operator (op, mode)
1541 enum machine_mode mode ATTRIBUTE_UNUSED;
1543 enum rtx_code code = GET_CODE (op);
1546 /* Operations supported directly. */
1556 /* These are equivalent to ones above in non-IEEE comparisons. */
1563 return !TARGET_IEEE_FP;
1568 /* Return 1 if OP is a valid comparison operator in valid mode. */
1570 ix86_comparison_operator (op, mode)
1572 enum machine_mode mode;
1574 enum machine_mode inmode;
1575 enum rtx_code code = GET_CODE (op);
1576 if (mode != VOIDmode && GET_MODE (op) != mode)
1578 if (GET_RTX_CLASS (code) != '<')
1580 inmode = GET_MODE (XEXP (op, 0));
1582 if (inmode == CCFPmode || inmode == CCFPUmode)
1584 enum rtx_code second_code, bypass_code;
1585 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1586 return (bypass_code == NIL && second_code == NIL);
1593 if (inmode == CCmode || inmode == CCGCmode
1594 || inmode == CCGOCmode || inmode == CCNOmode)
1597 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1598 if (inmode == CCmode)
1602 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1610 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1613 fcmov_comparison_operator (op, mode)
1615 enum machine_mode mode;
1617 enum machine_mode inmode;
1618 enum rtx_code code = GET_CODE (op);
1619 if (mode != VOIDmode && GET_MODE (op) != mode)
1621 if (GET_RTX_CLASS (code) != '<')
1623 inmode = GET_MODE (XEXP (op, 0));
1624 if (inmode == CCFPmode || inmode == CCFPUmode)
1626 enum rtx_code second_code, bypass_code;
1627 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1628 if (bypass_code != NIL || second_code != NIL)
1630 code = ix86_fp_compare_code_to_integer (code);
1632 /* i387 supports just limited amount of conditional codes. */
1635 case LTU: case GTU: case LEU: case GEU:
1636 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1639 case ORDERED: case UNORDERED:
1647 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1650 promotable_binary_operator (op, mode)
1652 enum machine_mode mode ATTRIBUTE_UNUSED;
1654 switch (GET_CODE (op))
1657 /* Modern CPUs have same latency for HImode and SImode multiply,
1658 but 386 and 486 do HImode multiply faster. */
1659 return ix86_cpu > PROCESSOR_I486;
1671 /* Nearly general operand, but accept any const_double, since we wish
1672 to be able to drop them into memory rather than have them get pulled
1676 cmp_fp_expander_operand (op, mode)
1678 enum machine_mode mode;
1680 if (mode != VOIDmode && mode != GET_MODE (op))
1682 if (GET_CODE (op) == CONST_DOUBLE)
1684 return general_operand (op, mode);
1687 /* Match an SI or HImode register for a zero_extract. */
1690 ext_register_operand (op, mode)
1692 enum machine_mode mode ATTRIBUTE_UNUSED;
1694 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1695 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1697 return register_operand (op, VOIDmode);
1700 /* Return 1 if this is a valid binary floating-point operation.
1701 OP is the expression matched, and MODE is its mode. */
1704 binary_fp_operator (op, mode)
1706 enum machine_mode mode;
1708 if (mode != VOIDmode && mode != GET_MODE (op))
1711 switch (GET_CODE (op))
1717 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1725 mult_operator(op, mode)
1727 enum machine_mode mode ATTRIBUTE_UNUSED;
1729 return GET_CODE (op) == MULT;
1733 div_operator(op, mode)
1735 enum machine_mode mode ATTRIBUTE_UNUSED;
1737 return GET_CODE (op) == DIV;
1741 arith_or_logical_operator (op, mode)
1743 enum machine_mode mode;
1745 return ((mode == VOIDmode || GET_MODE (op) == mode)
1746 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1747 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1750 /* Returns 1 if OP is memory operand with a displacement. */
1753 memory_displacement_operand (op, mode)
1755 enum machine_mode mode;
1757 struct ix86_address parts;
1759 if (! memory_operand (op, mode))
1762 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1765 return parts.disp != NULL_RTX;
1768 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1769 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1771 ??? It seems likely that this will only work because cmpsi is an
1772 expander, and no actual insns use this. */
1775 cmpsi_operand (op, mode)
1777 enum machine_mode mode;
1779 if (general_operand (op, mode))
1782 if (GET_CODE (op) == AND
1783 && GET_MODE (op) == SImode
1784 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1785 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1786 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1787 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1788 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1789 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1795 /* Returns 1 if OP is memory operand that can not be represented by the
1799 long_memory_operand (op, mode)
1801 enum machine_mode mode;
1803 if (! memory_operand (op, mode))
1806 return memory_address_length (op) != 0;
1809 /* Return nonzero if the rtx is known aligned. */
1812 aligned_operand (op, mode)
1814 enum machine_mode mode;
1816 struct ix86_address parts;
1818 if (!general_operand (op, mode))
1821 /* Registers and immediate operands are always "aligned". */
1822 if (GET_CODE (op) != MEM)
1825 /* Don't even try to do any aligned optimizations with volatiles. */
1826 if (MEM_VOLATILE_P (op))
1831 /* Pushes and pops are only valid on the stack pointer. */
1832 if (GET_CODE (op) == PRE_DEC
1833 || GET_CODE (op) == POST_INC)
1836 /* Decode the address. */
1837 if (! ix86_decompose_address (op, &parts))
1840 /* Look for some component that isn't known to be aligned. */
1844 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1849 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1854 if (GET_CODE (parts.disp) != CONST_INT
1855 || (INTVAL (parts.disp) & 3) != 0)
1859 /* Didn't find one -- this must be an aligned address. */
1863 /* Return true if the constant is something that can be loaded with
1864 a special instruction. Only handle 0.0 and 1.0; others are less
1868 standard_80387_constant_p (x)
1871 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1873 /* Note that on the 80387, other constants, such as pi, that we should support
1874 too. On some machines, these are much slower to load as standard constant,
1875 than to load from doubles in memory. */
1876 if (x == CONST0_RTX (GET_MODE (x)))
1878 if (x == CONST1_RTX (GET_MODE (x)))
1883 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1886 standard_sse_constant_p (x)
1889 if (GET_CODE (x) != CONST_DOUBLE)
1891 return (x == CONST0_RTX (GET_MODE (x)));
1894 /* Returns 1 if OP contains a symbol reference */
1897 symbolic_reference_mentioned_p (op)
1900 register const char *fmt;
1903 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1906 fmt = GET_RTX_FORMAT (GET_CODE (op));
1907 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1913 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1914 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1918 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1925 /* Return 1 if it is appropriate to emit `ret' instructions in the
1926 body of a function. Do this only if the epilogue is simple, needing a
1927 couple of insns. Prior to reloading, we can't tell how many registers
1928 must be saved, so return 0 then. Return 0 if there is no frame
1929 marker to de-allocate.
1931 If NON_SAVING_SETJMP is defined and true, then it is not possible
1932 for the epilogue to be simple, so return 0. This is a special case
1933 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1934 until final, but jump_optimize may need to know sooner if a
1938 ix86_can_use_return_insn_p ()
1940 struct ix86_frame frame;
1942 #ifdef NON_SAVING_SETJMP
1943 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1946 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1947 if (profile_block_flag == 2)
1951 if (! reload_completed || frame_pointer_needed)
1954 /* Don't allow more than 32 pop, since that's all we can do
1955 with one instruction. */
1956 if (current_function_pops_args
1957 && current_function_args_size >= 32768)
1960 ix86_compute_frame_layout (&frame);
1961 return frame.to_allocate == 0 && frame.nregs == 0;
1964 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
1966 x86_64_sign_extended_value (value)
1969 switch (GET_CODE (value))
1971 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1972 to be at least 32 and this all acceptable constants are
1973 represented as CONST_INT. */
1975 if (HOST_BITS_PER_WIDE_INT == 32)
1979 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
1980 return trunc_int_for_mode (val, SImode) == val;
1984 /* For certain code models, the symbolic references are known to fit. */
1986 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
1988 /* For certain code models, the code is near as well. */
1990 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
1992 /* We also may accept the offsetted memory references in certain special
1995 if (GET_CODE (XEXP (value, 0)) == UNSPEC
1996 && XVECLEN (XEXP (value, 0), 0) == 1
1997 && XINT (XEXP (value, 0), 1) == 15)
1999 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2001 rtx op1 = XEXP (XEXP (value, 0), 0);
2002 rtx op2 = XEXP (XEXP (value, 0), 1);
2003 HOST_WIDE_INT offset;
2005 if (ix86_cmodel == CM_LARGE)
2007 if (GET_CODE (op2) != CONST_INT)
2009 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2010 switch (GET_CODE (op1))
2013 /* For CM_SMALL assume that latest object is 1MB before
2014 end of 31bits boundary. We may also accept pretty
2015 large negative constants knowing that all objects are
2016 in the positive half of address space. */
2017 if (ix86_cmodel == CM_SMALL
2018 && offset < 1024*1024*1024
2019 && trunc_int_for_mode (offset, SImode) == offset)
2021 /* For CM_KERNEL we know that all object resist in the
2022 negative half of 32bits address space. We may not
2023 accept negative offsets, since they may be just off
2024 and we may accept pretty large possitive ones. */
2025 if (ix86_cmodel == CM_KERNEL
2027 && trunc_int_for_mode (offset, SImode) == offset)
2031 /* These conditions are similar to SYMBOL_REF ones, just the
2032 constraints for code models differ. */
2033 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2034 && offset < 1024*1024*1024
2035 && trunc_int_for_mode (offset, SImode) == offset)
2037 if (ix86_cmodel == CM_KERNEL
2039 && trunc_int_for_mode (offset, SImode) == offset)
2052 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
2054 x86_64_zero_extended_value (value)
2057 switch (GET_CODE (value))
2060 if (HOST_BITS_PER_WIDE_INT == 32)
2061 return (GET_MODE (value) == VOIDmode
2062 && !CONST_DOUBLE_HIGH (value));
2066 if (HOST_BITS_PER_WIDE_INT == 32)
2067 return INTVAL (value) >= 0;
2069 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2072 /* For certain code models, the symbolic references are known to fit. */
2074 return ix86_cmodel == CM_SMALL;
2076 /* For certain code models, the code is near as well. */
2078 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2080 /* We also may accept the offsetted memory references in certain special
2083 if (GET_CODE (XEXP (value, 0)) == PLUS)
2085 rtx op1 = XEXP (XEXP (value, 0), 0);
2086 rtx op2 = XEXP (XEXP (value, 0), 1);
2088 if (ix86_cmodel == CM_LARGE)
2090 switch (GET_CODE (op1))
2094 /* For small code model we may accept pretty large possitive
2095 offsets, since one bit is available for free. Negative
2096 offsets are limited by the size of NULL pointer area
2097 specified by the ABI. */
2098 if (ix86_cmodel == CM_SMALL
2099 && GET_CODE (op2) == CONST_INT
2100 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2101 && (trunc_int_for_mode (INTVAL (op2), SImode)
2104 /* ??? For the kernel, we may accept adjustment of
2105 -0x10000000, since we know that it will just convert
2106 negative address space to possitive, but perhaps this
2107 is not worthwhile. */
2110 /* These conditions are similar to SYMBOL_REF ones, just the
2111 constraints for code models differ. */
2112 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2113 && GET_CODE (op2) == CONST_INT
2114 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2115 && (trunc_int_for_mode (INTVAL (op2), SImode)
2129 /* Value should be nonzero if functions must have frame pointers.
2130 Zero means the frame pointer need not be set up (and parms may
2131 be accessed via the stack pointer) in functions that seem suitable. */
2134 ix86_frame_pointer_required ()
2136 /* If we accessed previous frames, then the generated code expects
2137 to be able to access the saved ebp value in our frame. */
2138 if (cfun->machine->accesses_prev_frame)
2141 /* Several x86 os'es need a frame pointer for other reasons,
2142 usually pertaining to setjmp. */
2143 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2146 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2147 the frame pointer by default. Turn it back on now if we've not
2148 got a leaf function. */
2149 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2155 /* Record that the current function accesses previous call frames. */
2158 ix86_setup_frame_addresses ()
2160 cfun->machine->accesses_prev_frame = 1;
2163 static char pic_label_name[32];
2165 /* This function generates code for -fpic that loads %ebx with
2166 the return address of the caller and then returns. */
2169 ix86_asm_file_end (file)
2174 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2177 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2178 to updating relocations to a section being discarded such that this
2179 doesn't work. Ought to detect this at configure time. */
2180 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
2181 /* The trick here is to create a linkonce section containing the
2182 pic label thunk, but to refer to it with an internal label.
2183 Because the label is internal, we don't have inter-dso name
2184 binding issues on hosts that don't support ".hidden".
2186 In order to use these macros, however, we must create a fake
2189 tree decl = build_decl (FUNCTION_DECL,
2190 get_identifier ("i686.get_pc_thunk"),
2192 DECL_ONE_ONLY (decl) = 1;
2193 UNIQUE_SECTION (decl, 0);
2194 named_section (decl, NULL, 0);
2200 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2201 internal (non-global) label that's being emitted, it didn't make
2202 sense to have .type information for local labels. This caused
2203 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2204 me debug info for a label that you're declaring non-global?) this
2205 was changed to call ASM_OUTPUT_LABEL() instead. */
2207 ASM_OUTPUT_LABEL (file, pic_label_name);
2209 xops[0] = pic_offset_table_rtx;
2210 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2211 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2212 output_asm_insn ("ret", xops);
2216 load_pic_register ()
2223 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2225 if (TARGET_DEEP_BRANCH_PREDICTION)
2227 if (! pic_label_name[0])
2228 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
2229 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
2233 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
2236 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2238 if (! TARGET_DEEP_BRANCH_PREDICTION)
2239 emit_insn (gen_popsi1 (pic_offset_table_rtx));
2241 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
2244 /* Generate an "push" pattern for input ARG. */
2250 return gen_rtx_SET (VOIDmode,
2252 gen_rtx_PRE_DEC (Pmode,
2253 stack_pointer_rtx)),
2257 /* Return 1 if we need to save REGNO. */
2259 ix86_save_reg (regno, maybe_eh_return)
2261 bool maybe_eh_return;
2265 && regno == PIC_OFFSET_TABLE_REGNUM
2266 && (current_function_uses_pic_offset_table
2267 || current_function_uses_const_pool
2268 || current_function_calls_eh_return))
2271 if (current_function_calls_eh_return && maybe_eh_return)
2276 unsigned test = EH_RETURN_DATA_REGNO(i);
2277 if (test == INVALID_REGNUM)
2279 if (test == (unsigned) regno)
2284 return (regs_ever_live[regno]
2285 && !call_used_regs[regno]
2286 && !fixed_regs[regno]
2287 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
2290 /* Return number of registers to be saved on the stack. */
2298 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2299 if (ix86_save_reg (regno, true))
2304 /* Return the offset between two registers, one to be eliminated, and the other
2305 its replacement, at the start of a routine. */
2308 ix86_initial_elimination_offset (from, to)
2312 struct ix86_frame frame;
2313 ix86_compute_frame_layout (&frame);
2315 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2316 return frame.hard_frame_pointer_offset;
2317 else if (from == FRAME_POINTER_REGNUM
2318 && to == HARD_FRAME_POINTER_REGNUM)
2319 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
2322 if (to != STACK_POINTER_REGNUM)
2324 else if (from == ARG_POINTER_REGNUM)
2325 return frame.stack_pointer_offset;
2326 else if (from != FRAME_POINTER_REGNUM)
2329 return frame.stack_pointer_offset - frame.frame_pointer_offset;
2333 /* Fill structure ix86_frame about frame of currently computed function. */
2336 ix86_compute_frame_layout (frame)
2337 struct ix86_frame *frame;
2339 HOST_WIDE_INT total_size;
2340 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
2342 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
2343 HOST_WIDE_INT size = get_frame_size ();
2345 frame->nregs = ix86_nsaved_regs ();
2348 /* Skip return value and save base pointer. */
2349 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2351 frame->hard_frame_pointer_offset = offset;
2353 /* Do some sanity checking of stack_alignment_needed and
2354 preferred_alignment, since i386 port is the only using those features
2355 that may break easilly. */
2357 if (size && !stack_alignment_needed)
2359 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2361 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2363 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2366 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2367 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2369 /* Register save area */
2370 offset += frame->nregs * UNITS_PER_WORD;
2373 if (ix86_save_varrargs_registers)
2375 offset += X86_64_VARARGS_SIZE;
2376 frame->va_arg_size = X86_64_VARARGS_SIZE;
2379 frame->va_arg_size = 0;
2381 /* Align start of frame for local function. */
2382 frame->padding1 = ((offset + stack_alignment_needed - 1)
2383 & -stack_alignment_needed) - offset;
2385 offset += frame->padding1;
2387 /* Frame pointer points here. */
2388 frame->frame_pointer_offset = offset;
2392 /* Add outgoing arguments area. */
2393 if (ACCUMULATE_OUTGOING_ARGS)
2395 offset += current_function_outgoing_args_size;
2396 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2399 frame->outgoing_arguments_size = 0;
2401 /* Align stack boundary. */
2402 frame->padding2 = ((offset + preferred_alignment - 1)
2403 & -preferred_alignment) - offset;
2405 offset += frame->padding2;
2407 /* We've reached end of stack frame. */
2408 frame->stack_pointer_offset = offset;
2410 /* Size prologue needs to allocate. */
2411 frame->to_allocate =
2412 (size + frame->padding1 + frame->padding2
2413 + frame->outgoing_arguments_size + frame->va_arg_size);
2415 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2416 && current_function_is_leaf)
2418 frame->red_zone_size = frame->to_allocate;
2419 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2420 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2423 frame->red_zone_size = 0;
2424 frame->to_allocate -= frame->red_zone_size;
2425 frame->stack_pointer_offset -= frame->red_zone_size;
2427 fprintf (stderr, "nregs: %i\n", frame->nregs);
2428 fprintf (stderr, "size: %i\n", size);
2429 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2430 fprintf (stderr, "padding1: %i\n", frame->padding1);
2431 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
2432 fprintf (stderr, "padding2: %i\n", frame->padding2);
2433 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2434 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
2435 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2436 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2437 frame->hard_frame_pointer_offset);
2438 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2442 /* Emit code to save registers in the prologue. */
2445 ix86_emit_save_regs ()
2450 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2451 if (ix86_save_reg (regno, true))
2453 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
2454 RTX_FRAME_RELATED_P (insn) = 1;
2458 /* Expand the prologue into a bunch of separate insns. */
2461 ix86_expand_prologue ()
2464 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2465 || current_function_uses_const_pool)
2467 struct ix86_frame frame;
2469 ix86_compute_frame_layout (&frame);
2471 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2472 slower on all targets. Also sdb doesn't like it. */
2474 if (frame_pointer_needed)
2476 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2477 RTX_FRAME_RELATED_P (insn) = 1;
2479 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2480 RTX_FRAME_RELATED_P (insn) = 1;
2483 ix86_emit_save_regs ();
2485 if (frame.to_allocate == 0)
2487 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2489 if (frame_pointer_needed)
2490 insn = emit_insn (gen_pro_epilogue_adjust_stack
2491 (stack_pointer_rtx, stack_pointer_rtx,
2492 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2495 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2496 GEN_INT (-frame.to_allocate)));
2498 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2499 GEN_INT (-frame.to_allocate)));
2500 RTX_FRAME_RELATED_P (insn) = 1;
2504 /* ??? Is this only valid for Win32? */
2511 arg0 = gen_rtx_REG (SImode, 0);
2512 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2514 sym = gen_rtx_MEM (FUNCTION_MODE,
2515 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2516 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
2518 CALL_INSN_FUNCTION_USAGE (insn)
2519 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2520 CALL_INSN_FUNCTION_USAGE (insn));
2523 #ifdef SUBTARGET_PROLOGUE
2528 load_pic_register ();
2530 /* If we are profiling, make sure no instructions are scheduled before
2531 the call to mcount. However, if -fpic, the above call will have
2533 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2534 emit_insn (gen_blockage ());
2537 /* Emit code to add TSIZE to esp value. Use POP instruction when
2541 ix86_emit_epilogue_esp_adjustment (tsize)
2544 /* If a frame pointer is present, we must be sure to tie the sp
2545 to the fp so that we don't mis-schedule. */
2546 if (frame_pointer_needed)
2547 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2550 hard_frame_pointer_rtx));
2553 emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2556 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2560 /* Emit code to restore saved registers using MOV insns. First register
2561 is restored from POINTER + OFFSET. */
2563 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2566 bool maybe_eh_return;
2570 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2571 if (ix86_save_reg (regno, maybe_eh_return))
2573 emit_move_insn (gen_rtx_REG (Pmode, regno),
2574 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2577 offset += UNITS_PER_WORD;
2581 /* Restore function stack, frame, and registers. */
2584 ix86_expand_epilogue (style)
2588 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2589 struct ix86_frame frame;
2590 HOST_WIDE_INT offset;
2592 ix86_compute_frame_layout (&frame);
2594 /* Calculate start of saved registers relative to ebp. Special care
2595 must be taken for the normal return case of a function using
2596 eh_return: the eax and edx registers are marked as saved, but not
2597 restored along this path. */
2598 offset = frame.nregs;
2599 if (current_function_calls_eh_return && style != 2)
2601 offset *= -UNITS_PER_WORD;
2603 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2604 if (profile_block_flag == 2)
2606 FUNCTION_BLOCK_PROFILER_EXIT;
2610 /* If we're only restoring one register and sp is not valid then
2611 using a move instruction to restore the register since it's
2612 less work than reloading sp and popping the register.
2614 The default code result in stack adjustment using add/lea instruction,
2615 while this code results in LEAVE instruction (or discrete equivalent),
2616 so it is profitable in some other cases as well. Especially when there
2617 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2618 and there is exactly one register to pop. This heruistic may need some
2619 tuning in future. */
2620 if ((!sp_valid && frame.nregs <= 1)
2621 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2622 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2623 && frame.nregs == 1)
2626 /* Restore registers. We can use ebp or esp to address the memory
2627 locations. If both are available, default to ebp, since offsets
2628 are known to be small. Only exception is esp pointing directly to the
2629 end of block of saved registers, where we may simplify addressing
2632 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2633 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2634 frame.to_allocate, style == 2);
2636 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2637 offset, style == 2);
2639 /* eh_return epilogues need %ecx added to the stack pointer. */
2642 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2644 if (frame_pointer_needed)
2646 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2647 tmp = plus_constant (tmp, UNITS_PER_WORD);
2648 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2650 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2651 emit_move_insn (hard_frame_pointer_rtx, tmp);
2653 emit_insn (gen_pro_epilogue_adjust_stack
2654 (stack_pointer_rtx, sa, const0_rtx,
2655 hard_frame_pointer_rtx));
2659 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2660 tmp = plus_constant (tmp, (frame.to_allocate
2661 + frame.nregs * UNITS_PER_WORD));
2662 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2665 else if (!frame_pointer_needed)
2666 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2667 + frame.nregs * UNITS_PER_WORD);
2668 /* If not an i386, mov & pop is faster than "leave". */
2669 else if (TARGET_USE_LEAVE || optimize_size)
2670 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
2673 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2674 hard_frame_pointer_rtx,
2676 hard_frame_pointer_rtx));
2678 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2680 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2685 /* First step is to deallocate the stack frame so that we can
2686 pop the registers. */
2689 if (!frame_pointer_needed)
2691 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2692 hard_frame_pointer_rtx,
2694 hard_frame_pointer_rtx));
2696 else if (frame.to_allocate)
2697 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2699 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2700 if (ix86_save_reg (regno, false))
2703 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2705 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2707 if (frame_pointer_needed)
2710 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2712 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2716 /* Sibcall epilogues don't want a return instruction. */
2720 if (current_function_pops_args && current_function_args_size)
2722 rtx popc = GEN_INT (current_function_pops_args);
2724 /* i386 can only pop 64K bytes. If asked to pop more, pop
2725 return address, do explicit add, and jump indirectly to the
2728 if (current_function_pops_args >= 65536)
2730 rtx ecx = gen_rtx_REG (SImode, 2);
2732 /* There are is no "pascal" calling convention in 64bit ABI. */
2736 emit_insn (gen_popsi1 (ecx));
2737 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2738 emit_jump_insn (gen_return_indirect_internal (ecx));
2741 emit_jump_insn (gen_return_pop_internal (popc));
2744 emit_jump_insn (gen_return_internal ());
2747 /* Extract the parts of an RTL expression that is a valid memory address
2748 for an instruction. Return false if the structure of the address is
2752 ix86_decompose_address (addr, out)
2754 struct ix86_address *out;
2756 rtx base = NULL_RTX;
2757 rtx index = NULL_RTX;
2758 rtx disp = NULL_RTX;
2759 HOST_WIDE_INT scale = 1;
2760 rtx scale_rtx = NULL_RTX;
2762 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2764 else if (GET_CODE (addr) == PLUS)
2766 rtx op0 = XEXP (addr, 0);
2767 rtx op1 = XEXP (addr, 1);
2768 enum rtx_code code0 = GET_CODE (op0);
2769 enum rtx_code code1 = GET_CODE (op1);
2771 if (code0 == REG || code0 == SUBREG)
2773 if (code1 == REG || code1 == SUBREG)
2774 index = op0, base = op1; /* index + base */
2776 base = op0, disp = op1; /* base + displacement */
2778 else if (code0 == MULT)
2780 index = XEXP (op0, 0);
2781 scale_rtx = XEXP (op0, 1);
2782 if (code1 == REG || code1 == SUBREG)
2783 base = op1; /* index*scale + base */
2785 disp = op1; /* index*scale + disp */
2787 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2789 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2790 scale_rtx = XEXP (XEXP (op0, 0), 1);
2791 base = XEXP (op0, 1);
2794 else if (code0 == PLUS)
2796 index = XEXP (op0, 0); /* index + base + disp */
2797 base = XEXP (op0, 1);
2803 else if (GET_CODE (addr) == MULT)
2805 index = XEXP (addr, 0); /* index*scale */
2806 scale_rtx = XEXP (addr, 1);
2808 else if (GET_CODE (addr) == ASHIFT)
2812 /* We're called for lea too, which implements ashift on occasion. */
2813 index = XEXP (addr, 0);
2814 tmp = XEXP (addr, 1);
2815 if (GET_CODE (tmp) != CONST_INT)
2817 scale = INTVAL (tmp);
2818 if ((unsigned HOST_WIDE_INT) scale > 3)
2823 disp = addr; /* displacement */
2825 /* Extract the integral value of scale. */
2828 if (GET_CODE (scale_rtx) != CONST_INT)
2830 scale = INTVAL (scale_rtx);
2833 /* Allow arg pointer and stack pointer as index if there is not scaling */
2834 if (base && index && scale == 1
2835 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2836 || index == stack_pointer_rtx))
2843 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2844 if ((base == hard_frame_pointer_rtx
2845 || base == frame_pointer_rtx
2846 || base == arg_pointer_rtx) && !disp)
2849 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2850 Avoid this by transforming to [%esi+0]. */
2851 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2852 && base && !index && !disp
2854 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2857 /* Special case: encode reg+reg instead of reg*2. */
2858 if (!base && index && scale && scale == 2)
2859 base = index, scale = 1;
2861 /* Special case: scaling cannot be encoded without base or displacement. */
2862 if (!base && !disp && index && scale != 1)
2873 /* Return cost of the memory address x.
2874 For i386, it is better to use a complex address than let gcc copy
2875 the address into a reg and make a new pseudo. But not if the address
2876 requires to two regs - that would mean more pseudos with longer
2879 ix86_address_cost (x)
2882 struct ix86_address parts;
2885 if (!ix86_decompose_address (x, &parts))
2888 /* More complex memory references are better. */
2889 if (parts.disp && parts.disp != const0_rtx)
2892 /* Attempt to minimize number of registers in the address. */
2894 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2896 && (!REG_P (parts.index)
2897 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2901 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2903 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2904 && parts.base != parts.index)
2907 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2908 since it's predecode logic can't detect the length of instructions
2909 and it degenerates to vector decoded. Increase cost of such
2910 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2911 to split such addresses or even refuse such addresses at all.
2913 Following addressing modes are affected:
2918 The first and last case may be avoidable by explicitly coding the zero in
2919 memory address, but I don't have AMD-K6 machine handy to check this
2923 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2924 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2925 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2931 /* If X is a machine specific address (i.e. a symbol or label being
2932 referenced as a displacement from the GOT implemented using an
2933 UNSPEC), then return the base term. Otherwise return X. */
2936 ix86_find_base_term (x)
2941 if (GET_CODE (x) != PLUS
2942 || XEXP (x, 0) != pic_offset_table_rtx
2943 || GET_CODE (XEXP (x, 1)) != CONST)
2946 term = XEXP (XEXP (x, 1), 0);
2948 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2949 term = XEXP (term, 0);
2951 if (GET_CODE (term) != UNSPEC
2952 || XVECLEN (term, 0) != 1
2953 || XINT (term, 1) != 7)
2956 term = XVECEXP (term, 0, 0);
2958 if (GET_CODE (term) != SYMBOL_REF
2959 && GET_CODE (term) != LABEL_REF)
2965 /* Determine if a given CONST RTX is a valid memory displacement
2969 legitimate_pic_address_disp_p (disp)
2972 if (GET_CODE (disp) != CONST)
2974 disp = XEXP (disp, 0);
2976 if (GET_CODE (disp) == PLUS)
2978 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2980 disp = XEXP (disp, 0);
2983 if (GET_CODE (disp) != UNSPEC
2984 || XVECLEN (disp, 0) != 1)
2987 /* Must be @GOT or @GOTOFF. */
2988 if (XINT (disp, 1) != 6
2989 && XINT (disp, 1) != 7)
2992 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2993 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2999 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3000 memory address for an instruction. The MODE argument is the machine mode
3001 for the MEM expression that wants to use this address.
3003 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3004 convert common non-canonical forms to canonical form so that they will
3008 legitimate_address_p (mode, addr, strict)
3009 enum machine_mode mode;
3013 struct ix86_address parts;
3014 rtx base, index, disp;
3015 HOST_WIDE_INT scale;
3016 const char *reason = NULL;
3017 rtx reason_rtx = NULL_RTX;
3019 if (TARGET_DEBUG_ADDR)
3022 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3023 GET_MODE_NAME (mode), strict);
3027 if (! ix86_decompose_address (addr, &parts))
3029 reason = "decomposition failed";
3034 index = parts.index;
3036 scale = parts.scale;
3038 /* Validate base register.
3040 Don't allow SUBREG's here, it can lead to spill failures when the base
3041 is one word out of a two word structure, which is represented internally
3048 if (GET_CODE (base) != REG)
3050 reason = "base is not a register";
3054 if (GET_MODE (base) != Pmode)
3056 reason = "base is not in Pmode";
3060 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3061 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3063 reason = "base is not valid";
3068 /* Validate index register.
3070 Don't allow SUBREG's here, it can lead to spill failures when the index
3071 is one word out of a two word structure, which is represented internally
3078 if (GET_CODE (index) != REG)
3080 reason = "index is not a register";
3084 if (GET_MODE (index) != Pmode)
3086 reason = "index is not in Pmode";
3090 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3091 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3093 reason = "index is not valid";
3098 /* Validate scale factor. */
3101 reason_rtx = GEN_INT (scale);
3104 reason = "scale without index";
3108 if (scale != 2 && scale != 4 && scale != 8)
3110 reason = "scale is not a valid multiplier";
3115 /* Validate displacement. */
3120 if (!CONSTANT_ADDRESS_P (disp))
3122 reason = "displacement is not constant";
3128 if (!x86_64_sign_extended_value (disp))
3130 reason = "displacement is out of range";
3136 if (GET_CODE (disp) == CONST_DOUBLE)
3138 reason = "displacement is a const_double";
3143 if (flag_pic && SYMBOLIC_CONST (disp))
3145 if (TARGET_64BIT && (index || base))
3147 reason = "non-constant pic memory reference";
3150 if (! legitimate_pic_address_disp_p (disp))
3152 reason = "displacement is an invalid pic construct";
3156 /* This code used to verify that a symbolic pic displacement
3157 includes the pic_offset_table_rtx register.
3159 While this is good idea, unfortunately these constructs may
3160 be created by "adds using lea" optimization for incorrect
3169 This code is nonsensical, but results in addressing
3170 GOT table with pic_offset_table_rtx base. We can't
3171 just refuse it easilly, since it gets matched by
3172 "addsi3" pattern, that later gets split to lea in the
3173 case output register differs from input. While this
3174 can be handled by separate addsi pattern for this case
3175 that never results in lea, this seems to be easier and
3176 correct fix for crash to disable this test. */
3178 else if (HALF_PIC_P ())
3180 if (! HALF_PIC_ADDRESS_P (disp)
3181 || (base != NULL_RTX || index != NULL_RTX))
3183 reason = "displacement is an invalid half-pic reference";
3189 /* Everything looks valid. */
3190 if (TARGET_DEBUG_ADDR)
3191 fprintf (stderr, "Success.\n");
3195 if (TARGET_DEBUG_ADDR)
3197 fprintf (stderr, "Error: %s\n", reason);
3198 debug_rtx (reason_rtx);
3203 /* Return an unique alias set for the GOT. */
3205 static HOST_WIDE_INT
3206 ix86_GOT_alias_set ()
3208 static HOST_WIDE_INT set = -1;
3210 set = new_alias_set ();
3214 /* Return a legitimate reference for ORIG (an address) using the
3215 register REG. If REG is 0, a new pseudo is generated.
3217 There are two types of references that must be handled:
3219 1. Global data references must load the address from the GOT, via
3220 the PIC reg. An insn is emitted to do this load, and the reg is
3223 2. Static data references, constant pool addresses, and code labels
3224 compute the address as an offset from the GOT, whose base is in
3225 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3226 differentiate them from global data objects. The returned
3227 address is the PIC reg + an unspec constant.
3229 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
3230 reg also appears in the address. */
3233 legitimize_pic_address (orig, reg)
3241 if (GET_CODE (addr) == LABEL_REF
3242 || (GET_CODE (addr) == SYMBOL_REF
3243 && (CONSTANT_POOL_ADDRESS_P (addr)
3244 || SYMBOL_REF_FLAG (addr))))
3246 /* This symbol may be referenced via a displacement from the PIC
3247 base address (@GOTOFF). */
3249 current_function_uses_pic_offset_table = 1;
3250 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3251 new = gen_rtx_CONST (Pmode, new);
3252 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3256 emit_move_insn (reg, new);
3260 else if (GET_CODE (addr) == SYMBOL_REF)
3262 /* This symbol must be referenced via a load from the
3263 Global Offset Table (@GOT). */
3265 current_function_uses_pic_offset_table = 1;
3266 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3267 new = gen_rtx_CONST (Pmode, new);
3268 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3269 new = gen_rtx_MEM (Pmode, new);
3270 RTX_UNCHANGING_P (new) = 1;
3271 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3274 reg = gen_reg_rtx (Pmode);
3275 emit_move_insn (reg, new);
3280 if (GET_CODE (addr) == CONST)
3282 addr = XEXP (addr, 0);
3283 if (GET_CODE (addr) == UNSPEC)
3285 /* Check that the unspec is one of the ones we generate? */
3287 else if (GET_CODE (addr) != PLUS)
3290 if (GET_CODE (addr) == PLUS)
3292 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3294 /* Check first to see if this is a constant offset from a @GOTOFF
3295 symbol reference. */
3296 if ((GET_CODE (op0) == LABEL_REF
3297 || (GET_CODE (op0) == SYMBOL_REF
3298 && (CONSTANT_POOL_ADDRESS_P (op0)
3299 || SYMBOL_REF_FLAG (op0))))
3300 && GET_CODE (op1) == CONST_INT)
3302 current_function_uses_pic_offset_table = 1;
3303 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3304 new = gen_rtx_PLUS (Pmode, new, op1);
3305 new = gen_rtx_CONST (Pmode, new);
3306 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3310 emit_move_insn (reg, new);
3316 base = legitimize_pic_address (XEXP (addr, 0), reg);
3317 new = legitimize_pic_address (XEXP (addr, 1),
3318 base == reg ? NULL_RTX : reg);
3320 if (GET_CODE (new) == CONST_INT)
3321 new = plus_constant (base, INTVAL (new));
3324 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3326 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3327 new = XEXP (new, 1);
3329 new = gen_rtx_PLUS (Pmode, base, new);
3337 /* Try machine-dependent ways of modifying an illegitimate address
3338 to be legitimate. If we find one, return the new, valid address.
3339 This macro is used in only one place: `memory_address' in explow.c.
3341 OLDX is the address as it was before break_out_memory_refs was called.
3342 In some cases it is useful to look at this to decide what needs to be done.
3344 MODE and WIN are passed so that this macro can use
3345 GO_IF_LEGITIMATE_ADDRESS.
3347 It is always safe for this macro to do nothing. It exists to recognize
3348 opportunities to optimize the output.
3350 For the 80386, we handle X+REG by loading X into a register R and
3351 using R+REG. R will go in a general reg and indexing will be used.
3352 However, if REG is a broken-out memory address or multiplication,
3353 nothing needs to be done because REG can certainly go in a general reg.
3355 When -fpic is used, special handling is needed for symbolic references.
3356 See comments by legitimize_pic_address in i386.c for details. */
3359 legitimize_address (x, oldx, mode)
3361 register rtx oldx ATTRIBUTE_UNUSED;
3362 enum machine_mode mode;
3367 if (TARGET_DEBUG_ADDR)
3369 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3370 GET_MODE_NAME (mode));
3374 if (flag_pic && SYMBOLIC_CONST (x))
3375 return legitimize_pic_address (x, 0);
3377 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3378 if (GET_CODE (x) == ASHIFT
3379 && GET_CODE (XEXP (x, 1)) == CONST_INT
3380 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3383 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3384 GEN_INT (1 << log));
3387 if (GET_CODE (x) == PLUS)
3389 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3391 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3392 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3393 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3396 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3397 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3398 GEN_INT (1 << log));
3401 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3402 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3403 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3406 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3407 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3408 GEN_INT (1 << log));
3411 /* Put multiply first if it isn't already. */
3412 if (GET_CODE (XEXP (x, 1)) == MULT)
3414 rtx tmp = XEXP (x, 0);
3415 XEXP (x, 0) = XEXP (x, 1);
3420 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3421 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3422 created by virtual register instantiation, register elimination, and
3423 similar optimizations. */
3424 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3427 x = gen_rtx_PLUS (Pmode,
3428 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3429 XEXP (XEXP (x, 1), 0)),
3430 XEXP (XEXP (x, 1), 1));
3434 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3435 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3436 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3437 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3438 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3439 && CONSTANT_P (XEXP (x, 1)))
3442 rtx other = NULL_RTX;
3444 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3446 constant = XEXP (x, 1);
3447 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3449 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3451 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3452 other = XEXP (x, 1);
3460 x = gen_rtx_PLUS (Pmode,
3461 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3462 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3463 plus_constant (other, INTVAL (constant)));
3467 if (changed && legitimate_address_p (mode, x, FALSE))
3470 if (GET_CODE (XEXP (x, 0)) == MULT)
3473 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3476 if (GET_CODE (XEXP (x, 1)) == MULT)
3479 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3483 && GET_CODE (XEXP (x, 1)) == REG
3484 && GET_CODE (XEXP (x, 0)) == REG)
3487 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3490 x = legitimize_pic_address (x, 0);
3493 if (changed && legitimate_address_p (mode, x, FALSE))
3496 if (GET_CODE (XEXP (x, 0)) == REG)
3498 register rtx temp = gen_reg_rtx (Pmode);
3499 register rtx val = force_operand (XEXP (x, 1), temp);
3501 emit_move_insn (temp, val);
3507 else if (GET_CODE (XEXP (x, 1)) == REG)
3509 register rtx temp = gen_reg_rtx (Pmode);
3510 register rtx val = force_operand (XEXP (x, 0), temp);
3512 emit_move_insn (temp, val);
3522 /* Print an integer constant expression in assembler syntax. Addition
3523 and subtraction are the only arithmetic that may appear in these
3524 expressions. FILE is the stdio stream to write to, X is the rtx, and
3525 CODE is the operand print code from the output string. */
3528 output_pic_addr_const (file, x, code)
3535 switch (GET_CODE (x))
3545 assemble_name (file, XSTR (x, 0));
3546 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3547 fputs ("@PLT", file);
3554 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3555 assemble_name (asm_out_file, buf);
3559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3563 /* This used to output parentheses around the expression,
3564 but that does not work on the 386 (either ATT or BSD assembler). */
3565 output_pic_addr_const (file, XEXP (x, 0), code);
3569 if (GET_MODE (x) == VOIDmode)
3571 /* We can use %d if the number is <32 bits and positive. */
3572 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3573 fprintf (file, "0x%lx%08lx",
3574 (unsigned long) CONST_DOUBLE_HIGH (x),
3575 (unsigned long) CONST_DOUBLE_LOW (x));
3577 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3580 /* We can't handle floating point constants;
3581 PRINT_OPERAND must handle them. */
3582 output_operand_lossage ("floating constant misused");
3586 /* Some assemblers need integer constants to appear first. */
3587 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3589 output_pic_addr_const (file, XEXP (x, 0), code);
3591 output_pic_addr_const (file, XEXP (x, 1), code);
3593 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3595 output_pic_addr_const (file, XEXP (x, 1), code);
3597 output_pic_addr_const (file, XEXP (x, 0), code);
3604 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3605 output_pic_addr_const (file, XEXP (x, 0), code);
3607 output_pic_addr_const (file, XEXP (x, 1), code);
3608 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3612 if (XVECLEN (x, 0) != 1)
3614 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3615 switch (XINT (x, 1))
3618 fputs ("@GOT", file);
3621 fputs ("@GOTOFF", file);
3624 fputs ("@PLT", file);
3627 output_operand_lossage ("invalid UNSPEC as operand");
3633 output_operand_lossage ("invalid expression as operand");
3637 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3638 We need to handle our special PIC relocations. */
3641 i386_dwarf_output_addr_const (file, x)
3645 fprintf (file, "%s", INT_ASM_OP);
3647 output_pic_addr_const (file, x, '\0');
3649 output_addr_const (file, x);
3653 /* In the name of slightly smaller debug output, and to cater to
3654 general assembler losage, recognize PIC+GOTOFF and turn it back
3655 into a direct symbol reference. */
3658 i386_simplify_dwarf_addr (orig_x)
3663 if (GET_CODE (x) != PLUS
3664 || GET_CODE (XEXP (x, 0)) != REG
3665 || GET_CODE (XEXP (x, 1)) != CONST)
3668 x = XEXP (XEXP (x, 1), 0);
3669 if (GET_CODE (x) == UNSPEC
3670 && (XINT (x, 1) == 6
3671 || XINT (x, 1) == 7))
3672 return XVECEXP (x, 0, 0);
3674 if (GET_CODE (x) == PLUS
3675 && GET_CODE (XEXP (x, 0)) == UNSPEC
3676 && GET_CODE (XEXP (x, 1)) == CONST_INT
3677 && (XINT (XEXP (x, 0), 1) == 6
3678 || XINT (XEXP (x, 0), 1) == 7))
3679 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3685 put_condition_code (code, mode, reverse, fp, file)
3687 enum machine_mode mode;
3693 if (mode == CCFPmode || mode == CCFPUmode)
3695 enum rtx_code second_code, bypass_code;
3696 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3697 if (bypass_code != NIL || second_code != NIL)
3699 code = ix86_fp_compare_code_to_integer (code);
3703 code = reverse_condition (code);
3714 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3719 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3720 Those same assemblers have the same but opposite losage on cmov. */
3723 suffix = fp ? "nbe" : "a";
3726 if (mode == CCNOmode || mode == CCGOCmode)
3728 else if (mode == CCmode || mode == CCGCmode)
3739 if (mode == CCNOmode || mode == CCGOCmode)
3741 else if (mode == CCmode || mode == CCGCmode)
3750 suffix = fp ? "nb" : "ae";
3753 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3763 suffix = fp ? "u" : "p";
3766 suffix = fp ? "nu" : "np";
3771 fputs (suffix, file);
3775 print_reg (x, code, file)
3780 if (REGNO (x) == ARG_POINTER_REGNUM
3781 || REGNO (x) == FRAME_POINTER_REGNUM
3782 || REGNO (x) == FLAGS_REG
3783 || REGNO (x) == FPSR_REG)
3786 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3791 else if (code == 'b')
3793 else if (code == 'k')
3795 else if (code == 'q')
3797 else if (code == 'y')
3799 else if (code == 'h')
3801 else if (code == 'm' || MMX_REG_P (x))
3804 code = GET_MODE_SIZE (GET_MODE (x));
3806 /* Irritatingly, AMD extended registers use different naming convention
3807 from the normal registers. */
3808 if (REX_INT_REG_P (x))
3815 error ("Extended registers have no high halves\n");
3818 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3821 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3824 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3827 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3830 error ("Unsupported operand size for extended register.\n");
3838 fputs (hi_reg_name[REGNO (x)], file);
3841 if (STACK_TOP_P (x))
3843 fputs ("st(0)", file);
3850 if (! ANY_FP_REG_P (x))
3851 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
3855 fputs (hi_reg_name[REGNO (x)], file);
3858 fputs (qi_reg_name[REGNO (x)], file);
3861 fputs (qi_high_reg_name[REGNO (x)], file);
3869 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3870 C -- print opcode suffix for set/cmov insn.
3871 c -- like C, but print reversed condition
3872 R -- print the prefix for register names.
3873 z -- print the opcode suffix for the size of the current operand.
3874 * -- print a star (in certain assembler syntax)
3875 A -- print an absolute memory reference.
3876 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3877 s -- print a shift double count, followed by the assemblers argument
3879 b -- print the QImode name of the register for the indicated operand.
3880 %b0 would print %al if operands[0] is reg 0.
3881 w -- likewise, print the HImode name of the register.
3882 k -- likewise, print the SImode name of the register.
3883 q -- likewise, print the DImode name of the register.
3884 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3885 y -- print "st(0)" instead of "st" as a register.
3886 m -- print "st(n)" as an mmx register.
3887 D -- print condition for SSE cmp instruction.
3891 print_operand (file, x, code)
3901 if (ASSEMBLER_DIALECT == 0)
3906 if (ASSEMBLER_DIALECT == 0)
3908 else if (ASSEMBLER_DIALECT == 1)
3910 /* Intel syntax. For absolute addresses, registers should not
3911 be surrounded by braces. */
3912 if (GET_CODE (x) != REG)
3915 PRINT_OPERAND (file, x, 0);
3921 PRINT_OPERAND (file, x, 0);
3926 if (ASSEMBLER_DIALECT == 0)
3931 if (ASSEMBLER_DIALECT == 0)
3936 if (ASSEMBLER_DIALECT == 0)
3941 if (ASSEMBLER_DIALECT == 0)
3946 if (ASSEMBLER_DIALECT == 0)
3951 if (ASSEMBLER_DIALECT == 0)
3956 /* 387 opcodes don't get size suffixes if the operands are
3959 if (STACK_REG_P (x))
3962 /* this is the size of op from size of operand */
3963 switch (GET_MODE_SIZE (GET_MODE (x)))
3966 #ifdef HAVE_GAS_FILDS_FISTS
3972 if (GET_MODE (x) == SFmode)
3987 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3989 #ifdef GAS_MNEMONICS
4016 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4018 PRINT_OPERAND (file, x, 0);
4024 /* Little bit of braindamage here. The SSE compare instructions
4025 does use completely different names for the comparisons that the
4026 fp conditional moves. */
4027 switch (GET_CODE (x))
4042 fputs ("unord", file);
4046 fputs ("neq", file);
4050 fputs ("nlt", file);
4054 fputs ("nle", file);
4057 fputs ("ord", file);
4065 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
4068 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
4071 /* Like above, but reverse condition */
4073 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4076 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
4082 sprintf (str, "invalid operand code `%c'", code);
4083 output_operand_lossage (str);
4088 if (GET_CODE (x) == REG)
4090 PRINT_REG (x, code, file);
4093 else if (GET_CODE (x) == MEM)
4095 /* No `byte ptr' prefix for call instructions. */
4096 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
4099 switch (GET_MODE_SIZE (GET_MODE (x)))
4101 case 1: size = "BYTE"; break;
4102 case 2: size = "WORD"; break;
4103 case 4: size = "DWORD"; break;
4104 case 8: size = "QWORD"; break;
4105 case 12: size = "XWORD"; break;
4106 case 16: size = "XMMWORD"; break;
4111 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4114 else if (code == 'w')
4116 else if (code == 'k')
4120 fputs (" PTR ", file);
4124 if (flag_pic && CONSTANT_ADDRESS_P (x))
4125 output_pic_addr_const (file, x, code);
4126 /* Avoid (%rip) for call operands. */
4127 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4128 && GET_CODE (x) != CONST_INT)
4129 output_addr_const (file, x);
4134 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4139 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4140 REAL_VALUE_TO_TARGET_SINGLE (r, l);
4142 if (ASSEMBLER_DIALECT == 0)
4144 fprintf (file, "0x%lx", l);
4147 /* These float cases don't actually occur as immediate operands. */
4148 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4153 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4154 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4155 fprintf (file, "%s", dstr);
4158 else if (GET_CODE (x) == CONST_DOUBLE
4159 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
4164 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4165 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4166 fprintf (file, "%s", dstr);
4172 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
4174 if (ASSEMBLER_DIALECT == 0)
4177 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4178 || GET_CODE (x) == LABEL_REF)
4180 if (ASSEMBLER_DIALECT == 0)
4183 fputs ("OFFSET FLAT:", file);
4186 if (GET_CODE (x) == CONST_INT)
4187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4189 output_pic_addr_const (file, x, code);
4191 output_addr_const (file, x);
4195 /* Print a memory operand whose address is ADDR. */
4198 print_operand_address (file, addr)
4202 struct ix86_address parts;
4203 rtx base, index, disp;
4206 if (! ix86_decompose_address (addr, &parts))
4210 index = parts.index;
4212 scale = parts.scale;
4214 if (!base && !index)
4216 /* Displacement only requires special attention. */
4218 if (GET_CODE (disp) == CONST_INT)
4220 if (ASSEMBLER_DIALECT != 0)
4222 if (USER_LABEL_PREFIX[0] == 0)
4224 fputs ("ds:", file);
4226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
4229 output_pic_addr_const (file, addr, 0);
4231 output_addr_const (file, addr);
4233 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4234 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4235 fputs ("(%rip)", file);
4239 if (ASSEMBLER_DIALECT == 0)
4244 output_pic_addr_const (file, disp, 0);
4245 else if (GET_CODE (disp) == LABEL_REF)
4246 output_asm_label (disp);
4248 output_addr_const (file, disp);
4253 PRINT_REG (base, 0, file);
4257 PRINT_REG (index, 0, file);
4259 fprintf (file, ",%d", scale);
4265 rtx offset = NULL_RTX;
4269 /* Pull out the offset of a symbol; print any symbol itself. */
4270 if (GET_CODE (disp) == CONST
4271 && GET_CODE (XEXP (disp, 0)) == PLUS
4272 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4274 offset = XEXP (XEXP (disp, 0), 1);
4275 disp = gen_rtx_CONST (VOIDmode,
4276 XEXP (XEXP (disp, 0), 0));
4280 output_pic_addr_const (file, disp, 0);
4281 else if (GET_CODE (disp) == LABEL_REF)
4282 output_asm_label (disp);
4283 else if (GET_CODE (disp) == CONST_INT)
4286 output_addr_const (file, disp);
4292 PRINT_REG (base, 0, file);
4295 if (INTVAL (offset) >= 0)
4297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4308 PRINT_REG (index, 0, file);
4310 fprintf (file, "*%d", scale);
4317 /* Split one or more DImode RTL references into pairs of SImode
4318 references. The RTL can be REG, offsettable MEM, integer constant, or
4319 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4320 split and "num" is its length. lo_half and hi_half are output arrays
4321 that parallel "operands". */
4324 split_di (operands, num, lo_half, hi_half)
4327 rtx lo_half[], hi_half[];
4331 rtx op = operands[num];
4332 if (CONSTANT_P (op))
4333 split_double (op, &lo_half[num], &hi_half[num]);
4334 else if (! reload_completed)
4336 lo_half[num] = gen_lowpart (SImode, op);
4337 hi_half[num] = gen_highpart (SImode, op);
4339 else if (GET_CODE (op) == REG)
4343 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4344 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
4346 else if (offsettable_memref_p (op))
4348 rtx lo_addr = XEXP (op, 0);
4349 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4350 lo_half[num] = change_address (op, SImode, lo_addr);
4351 hi_half[num] = change_address (op, SImode, hi_addr);
4358 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4359 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4360 is the expression of the binary operation. The output may either be
4361 emitted here, or returned to the caller, like all output_* functions.
4363 There is no guarantee that the operands are the same mode, as they
4364 might be within FLOAT or FLOAT_EXTEND expressions. */
4366 #ifndef SYSV386_COMPAT
4367 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4368 wants to fix the assemblers because that causes incompatibility
4369 with gcc. No-one wants to fix gcc because that causes
4370 incompatibility with assemblers... You can use the option of
4371 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4372 #define SYSV386_COMPAT 1
4376 output_387_binary_op (insn, operands)
4380 static char buf[30];
4383 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
4385 #ifdef ENABLE_CHECKING
4386 /* Even if we do not want to check the inputs, this documents input
4387 constraints. Which helps in understanding the following code. */
4388 if (STACK_REG_P (operands[0])
4389 && ((REG_P (operands[1])
4390 && REGNO (operands[0]) == REGNO (operands[1])
4391 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4392 || (REG_P (operands[2])
4393 && REGNO (operands[0]) == REGNO (operands[2])
4394 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4395 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4401 switch (GET_CODE (operands[3]))
4404 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4405 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4413 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4414 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4422 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4423 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4431 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4432 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4446 if (GET_MODE (operands[0]) == SFmode)
4447 strcat (buf, "ss\t{%2, %0|%0, %2}");
4449 strcat (buf, "sd\t{%2, %0|%0, %2}");
4454 switch (GET_CODE (operands[3]))
4458 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4460 rtx temp = operands[2];
4461 operands[2] = operands[1];
4465 /* know operands[0] == operands[1]. */
4467 if (GET_CODE (operands[2]) == MEM)
4473 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4475 if (STACK_TOP_P (operands[0]))
4476 /* How is it that we are storing to a dead operand[2]?
4477 Well, presumably operands[1] is dead too. We can't
4478 store the result to st(0) as st(0) gets popped on this
4479 instruction. Instead store to operands[2] (which I
4480 think has to be st(1)). st(1) will be popped later.
4481 gcc <= 2.8.1 didn't have this check and generated
4482 assembly code that the Unixware assembler rejected. */
4483 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4485 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4489 if (STACK_TOP_P (operands[0]))
4490 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4492 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4497 if (GET_CODE (operands[1]) == MEM)
4503 if (GET_CODE (operands[2]) == MEM)
4509 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4512 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4513 derived assemblers, confusingly reverse the direction of
4514 the operation for fsub{r} and fdiv{r} when the
4515 destination register is not st(0). The Intel assembler
4516 doesn't have this brain damage. Read !SYSV386_COMPAT to
4517 figure out what the hardware really does. */
4518 if (STACK_TOP_P (operands[0]))
4519 p = "{p\t%0, %2|rp\t%2, %0}";
4521 p = "{rp\t%2, %0|p\t%0, %2}";
4523 if (STACK_TOP_P (operands[0]))
4524 /* As above for fmul/fadd, we can't store to st(0). */
4525 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4527 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4532 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4535 if (STACK_TOP_P (operands[0]))
4536 p = "{rp\t%0, %1|p\t%1, %0}";
4538 p = "{p\t%1, %0|rp\t%0, %1}";
4540 if (STACK_TOP_P (operands[0]))
4541 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4543 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4548 if (STACK_TOP_P (operands[0]))
4550 if (STACK_TOP_P (operands[1]))
4551 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4553 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4556 else if (STACK_TOP_P (operands[1]))
4559 p = "{\t%1, %0|r\t%0, %1}";
4561 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4567 p = "{r\t%2, %0|\t%0, %2}";
4569 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4582 /* Output code for INSN to convert a float to a signed int. OPERANDS
4583 are the insn operands. The output may be [HSD]Imode and the input
4584 operand may be [SDX]Fmode. */
4587 output_fix_trunc (insn, operands)
4591 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4592 int dimode_p = GET_MODE (operands[0]) == DImode;
4595 /* Jump through a hoop or two for DImode, since the hardware has no
4596 non-popping instruction. We used to do this a different way, but
4597 that was somewhat fragile and broke with post-reload splitters. */
4598 if (dimode_p && !stack_top_dies)
4599 output_asm_insn ("fld\t%y1", operands);
4601 if (! STACK_TOP_P (operands[1]))
4604 xops[0] = GEN_INT (12);
4605 xops[1] = adj_offsettable_operand (operands[2], 1);
4606 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4608 xops[2] = operands[0];
4609 if (GET_CODE (operands[0]) != MEM)
4610 xops[2] = operands[3];
4612 output_asm_insn ("fnstcw\t%2", operands);
4613 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4614 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4615 output_asm_insn ("fldcw\t%2", operands);
4616 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4618 if (stack_top_dies || dimode_p)
4619 output_asm_insn ("fistp%z2\t%2", xops);
4621 output_asm_insn ("fist%z2\t%2", xops);
4623 output_asm_insn ("fldcw\t%2", operands);
4625 if (GET_CODE (operands[0]) != MEM)
4629 split_di (operands+0, 1, xops+0, xops+1);
4630 split_di (operands+3, 1, xops+2, xops+3);
4631 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4632 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4634 else if (GET_MODE (operands[0]) == SImode)
4635 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4637 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4643 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4644 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4645 when fucom should be used. */
4648 output_fp_compare (insn, operands, eflags_p, unordered_p)
4651 int eflags_p, unordered_p;
4654 rtx cmp_op0 = operands[0];
4655 rtx cmp_op1 = operands[1];
4656 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4661 cmp_op1 = operands[2];
4665 if (GET_MODE (operands[0]) == SFmode)
4667 return "ucomiss\t{%1, %0|%0, %1}";
4669 return "comiss\t{%1, %0|%0, %y}";
4672 return "ucomisd\t{%1, %0|%0, %1}";
4674 return "comisd\t{%1, %0|%0, %y}";
4677 if (! STACK_TOP_P (cmp_op0))
4680 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4682 if (STACK_REG_P (cmp_op1)
4684 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4685 && REGNO (cmp_op1) != FIRST_STACK_REG)
4687 /* If both the top of the 387 stack dies, and the other operand
4688 is also a stack register that dies, then this must be a
4689 `fcompp' float compare */
4693 /* There is no double popping fcomi variant. Fortunately,
4694 eflags is immune from the fstp's cc clobbering. */
4696 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4698 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4706 return "fucompp\n\tfnstsw\t%0";
4708 return "fcompp\n\tfnstsw\t%0";
4721 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4723 static const char * const alt[24] =
4735 "fcomi\t{%y1, %0|%0, %y1}",
4736 "fcomip\t{%y1, %0|%0, %y1}",
4737 "fucomi\t{%y1, %0|%0, %y1}",
4738 "fucomip\t{%y1, %0|%0, %y1}",
4745 "fcom%z2\t%y2\n\tfnstsw\t%0",
4746 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4747 "fucom%z2\t%y2\n\tfnstsw\t%0",
4748 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4750 "ficom%z2\t%y2\n\tfnstsw\t%0",
4751 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4759 mask = eflags_p << 3;
4760 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4761 mask |= unordered_p << 1;
4762 mask |= stack_top_dies;
4774 /* Output assembler code to FILE to initialize basic-block profiling.
4776 If profile_block_flag == 2
4778 Output code to call the subroutine `__bb_init_trace_func'
4779 and pass two parameters to it. The first parameter is
4780 the address of a block allocated in the object module.
4781 The second parameter is the number of the first basic block
4784 The name of the block is a local symbol made with this statement:
4786 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4788 Of course, since you are writing the definition of
4789 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4790 can take a short cut in the definition of this macro and use the
4791 name that you know will result.
4793 The number of the first basic block of the function is
4794 passed to the macro in BLOCK_OR_LABEL.
4796 If described in a virtual assembler language the code to be
4800 parameter2 <- BLOCK_OR_LABEL
4801 call __bb_init_trace_func
4803 else if profile_block_flag != 0
4805 Output code to call the subroutine `__bb_init_func'
4806 and pass one single parameter to it, which is the same
4807 as the first parameter to `__bb_init_trace_func'.
4809 The first word of this parameter is a flag which will be nonzero if
4810 the object module has already been initialized. So test this word
4811 first, and do not call `__bb_init_func' if the flag is nonzero.
4812 Note: When profile_block_flag == 2 the test need not be done
4813 but `__bb_init_trace_func' *must* be called.
4815 BLOCK_OR_LABEL may be used to generate a label number as a
4816 branch destination in case `__bb_init_func' will not be called.
4818 If described in a virtual assembler language the code to be
4829 ix86_output_function_block_profiler (file, block_or_label)
4833 static int num_func = 0;
4835 char block_table[80], false_label[80];
4837 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4839 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4840 xops[5] = stack_pointer_rtx;
4841 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4843 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4845 switch (profile_block_flag)
4848 xops[2] = GEN_INT (block_or_label);
4849 xops[3] = gen_rtx_MEM (Pmode,
4850 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4851 xops[6] = GEN_INT (8);
4853 output_asm_insn ("push{l}\t%2", xops);
4855 output_asm_insn ("push{l}\t%1", xops);
4858 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4859 output_asm_insn ("push{l}\t%7", xops);
4861 output_asm_insn ("call\t%P3", xops);
4862 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4866 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4868 xops[0] = const0_rtx;
4869 xops[2] = gen_rtx_MEM (Pmode,
4870 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4871 xops[3] = gen_rtx_MEM (Pmode,
4872 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4873 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4874 xops[6] = GEN_INT (4);
4876 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4878 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4879 output_asm_insn ("jne\t%2", xops);
4882 output_asm_insn ("push{l}\t%1", xops);
4885 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4886 output_asm_insn ("push{l}\t%7", xops);
4888 output_asm_insn ("call\t%P3", xops);
4889 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4890 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4896 /* Output assembler code to FILE to increment a counter associated
4897 with basic block number BLOCKNO.
4899 If profile_block_flag == 2
4901 Output code to initialize the global structure `__bb' and
4902 call the function `__bb_trace_func' which will increment the
4905 `__bb' consists of two words. In the first word the number
4906 of the basic block has to be stored. In the second word
4907 the address of a block allocated in the object module
4910 The basic block number is given by BLOCKNO.
4912 The address of the block is given by the label created with
4914 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4916 by FUNCTION_BLOCK_PROFILER.
4918 Of course, since you are writing the definition of
4919 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4920 can take a short cut in the definition of this macro and use the
4921 name that you know will result.
4923 If described in a virtual assembler language the code to be
4926 move BLOCKNO -> (__bb)
4927 move LPBX0 -> (__bb+4)
4928 call __bb_trace_func
4930 Note that function `__bb_trace_func' must not change the
4931 machine state, especially the flag register. To grant
4932 this, you must output code to save and restore registers
4933 either in this macro or in the macros MACHINE_STATE_SAVE
4934 and MACHINE_STATE_RESTORE. The last two macros will be
4935 used in the function `__bb_trace_func', so you must make
4936 sure that the function prologue does not change any
4937 register prior to saving it with MACHINE_STATE_SAVE.
4939 else if profile_block_flag != 0
4941 Output code to increment the counter directly.
4942 Basic blocks are numbered separately from zero within each
4943 compiled object module. The count associated with block number
4944 BLOCKNO is at index BLOCKNO in an array of words; the name of
4945 this array is a local symbol made with this statement:
4947 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4949 Of course, since you are writing the definition of
4950 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4951 can take a short cut in the definition of this macro and use the
4952 name that you know will result.
4954 If described in a virtual assembler language the code to be
4957 inc (LPBX2+4*BLOCKNO)
4961 ix86_output_block_profiler (file, blockno)
4962 FILE *file ATTRIBUTE_UNUSED;
4965 rtx xops[8], cnt_rtx;
4967 char *block_table = counts;
4969 switch (profile_block_flag)
4972 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4974 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4975 xops[2] = GEN_INT (blockno);
4976 xops[3] = gen_rtx_MEM (Pmode,
4977 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4978 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4979 xops[5] = plus_constant (xops[4], 4);
4980 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4981 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4983 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4985 output_asm_insn ("pushf", xops);
4986 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4989 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4990 output_asm_insn ("push{l}\t%7", xops);
4991 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4992 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4993 output_asm_insn ("pop{l}\t%7", xops);
4996 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4997 output_asm_insn ("call\t%P3", xops);
4998 output_asm_insn ("popf", xops);
5003 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5004 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5005 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
5008 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
5011 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
5013 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5014 output_asm_insn ("inc{l}\t%0", xops);
5021 ix86_expand_move (mode, operands)
5022 enum machine_mode mode;
5025 int strict = (reload_in_progress || reload_completed);
5028 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
5030 /* Emit insns to move operands[1] into operands[0]. */
5032 if (GET_CODE (operands[0]) == MEM)
5033 operands[1] = force_reg (Pmode, operands[1]);
5036 rtx temp = operands[0];
5037 if (GET_CODE (temp) != REG)
5038 temp = gen_reg_rtx (Pmode);
5039 temp = legitimize_pic_address (operands[1], temp);
5040 if (temp == operands[0])
5047 if (GET_CODE (operands[0]) == MEM
5048 && (GET_MODE (operands[0]) == QImode
5049 || !push_operand (operands[0], mode))
5050 && GET_CODE (operands[1]) == MEM)
5051 operands[1] = force_reg (mode, operands[1]);
5053 if (push_operand (operands[0], mode)
5054 && ! general_no_elim_operand (operands[1], mode))
5055 operands[1] = copy_to_mode_reg (mode, operands[1]);
5057 if (FLOAT_MODE_P (mode))
5059 /* If we are loading a floating point constant to a register,
5060 force the value to memory now, since we'll get better code
5061 out the back end. */
5065 else if (GET_CODE (operands[1]) == CONST_DOUBLE
5066 && register_operand (operands[0], mode))
5067 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
5071 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
5076 /* Attempt to expand a binary operator. Make the expansion closer to the
5077 actual machine, then just general_operand, which will allow 3 separate
5078 memory references (one output, two input) in a single insn. */
5081 ix86_expand_binary_operator (code, mode, operands)
5083 enum machine_mode mode;
5086 int matching_memory;
5087 rtx src1, src2, dst, op, clob;
5093 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5094 if (GET_RTX_CLASS (code) == 'c'
5095 && (rtx_equal_p (dst, src2)
5096 || immediate_operand (src1, mode)))
5103 /* If the destination is memory, and we do not have matching source
5104 operands, do things in registers. */
5105 matching_memory = 0;
5106 if (GET_CODE (dst) == MEM)
5108 if (rtx_equal_p (dst, src1))
5109 matching_memory = 1;
5110 else if (GET_RTX_CLASS (code) == 'c'
5111 && rtx_equal_p (dst, src2))
5112 matching_memory = 2;
5114 dst = gen_reg_rtx (mode);
5117 /* Both source operands cannot be in memory. */
5118 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5120 if (matching_memory != 2)
5121 src2 = force_reg (mode, src2);
5123 src1 = force_reg (mode, src1);
5126 /* If the operation is not commutable, source 1 cannot be a constant
5127 or non-matching memory. */
5128 if ((CONSTANT_P (src1)
5129 || (!matching_memory && GET_CODE (src1) == MEM))
5130 && GET_RTX_CLASS (code) != 'c')
5131 src1 = force_reg (mode, src1);
5133 /* If optimizing, copy to regs to improve CSE */
5134 if (optimize && ! no_new_pseudos)
5136 if (GET_CODE (dst) == MEM)
5137 dst = gen_reg_rtx (mode);
5138 if (GET_CODE (src1) == MEM)
5139 src1 = force_reg (mode, src1);
5140 if (GET_CODE (src2) == MEM)
5141 src2 = force_reg (mode, src2);
5144 /* Emit the instruction. */
5146 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5147 if (reload_in_progress)
5149 /* Reload doesn't know about the flags register, and doesn't know that
5150 it doesn't want to clobber it. We can only do this with PLUS. */
5157 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5158 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5161 /* Fix up the destination if needed. */
5162 if (dst != operands[0])
5163 emit_move_insn (operands[0], dst);
5166 /* Return TRUE or FALSE depending on whether the binary operator meets the
5167 appropriate constraints. */
5170 ix86_binary_operator_ok (code, mode, operands)
5172 enum machine_mode mode ATTRIBUTE_UNUSED;
5175 /* Both source operands cannot be in memory. */
5176 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5178 /* If the operation is not commutable, source 1 cannot be a constant. */
5179 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5181 /* If the destination is memory, we must have a matching source operand. */
5182 if (GET_CODE (operands[0]) == MEM
5183 && ! (rtx_equal_p (operands[0], operands[1])
5184 || (GET_RTX_CLASS (code) == 'c'
5185 && rtx_equal_p (operands[0], operands[2]))))
5187 /* If the operation is not commutable and the source 1 is memory, we must
5188 have a matching destionation. */
5189 if (GET_CODE (operands[1]) == MEM
5190 && GET_RTX_CLASS (code) != 'c'
5191 && ! rtx_equal_p (operands[0], operands[1]))
5196 /* Attempt to expand a unary operator. Make the expansion closer to the
5197 actual machine, then just general_operand, which will allow 2 separate
5198 memory references (one output, one input) in a single insn. */
5201 ix86_expand_unary_operator (code, mode, operands)
5203 enum machine_mode mode;
5206 int matching_memory;
5207 rtx src, dst, op, clob;
5212 /* If the destination is memory, and we do not have matching source
5213 operands, do things in registers. */
5214 matching_memory = 0;
5215 if (GET_CODE (dst) == MEM)
5217 if (rtx_equal_p (dst, src))
5218 matching_memory = 1;
5220 dst = gen_reg_rtx (mode);
5223 /* When source operand is memory, destination must match. */
5224 if (!matching_memory && GET_CODE (src) == MEM)
5225 src = force_reg (mode, src);
5227 /* If optimizing, copy to regs to improve CSE */
5228 if (optimize && ! no_new_pseudos)
5230 if (GET_CODE (dst) == MEM)
5231 dst = gen_reg_rtx (mode);
5232 if (GET_CODE (src) == MEM)
5233 src = force_reg (mode, src);
5236 /* Emit the instruction. */
5238 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5239 if (reload_in_progress || code == NOT)
5241 /* Reload doesn't know about the flags register, and doesn't know that
5242 it doesn't want to clobber it. */
5249 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5250 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5253 /* Fix up the destination if needed. */
5254 if (dst != operands[0])
5255 emit_move_insn (operands[0], dst);
5258 /* Return TRUE or FALSE depending on whether the unary operator meets the
5259 appropriate constraints. */
5262 ix86_unary_operator_ok (code, mode, operands)
5263 enum rtx_code code ATTRIBUTE_UNUSED;
5264 enum machine_mode mode ATTRIBUTE_UNUSED;
5265 rtx operands[2] ATTRIBUTE_UNUSED;
5267 /* If one of operands is memory, source and destination must match. */
5268 if ((GET_CODE (operands[0]) == MEM
5269 || GET_CODE (operands[1]) == MEM)
5270 && ! rtx_equal_p (operands[0], operands[1]))
5275 /* Return TRUE or FALSE depending on whether the first SET in INSN
5276 has source and destination with matching CC modes, and that the
5277 CC mode is at least as constrained as REQ_MODE. */
5280 ix86_match_ccmode (insn, req_mode)
5282 enum machine_mode req_mode;
5285 enum machine_mode set_mode;
5287 set = PATTERN (insn);
5288 if (GET_CODE (set) == PARALLEL)
5289 set = XVECEXP (set, 0, 0);
5290 if (GET_CODE (set) != SET)
5292 if (GET_CODE (SET_SRC (set)) != COMPARE)
5295 set_mode = GET_MODE (SET_DEST (set));
5299 if (req_mode != CCNOmode
5300 && (req_mode != CCmode
5301 || XEXP (SET_SRC (set), 1) != const0_rtx))
5305 if (req_mode == CCGCmode)
5309 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5313 if (req_mode == CCZmode)
5323 return (GET_MODE (SET_SRC (set)) == set_mode);
5326 /* Generate insn patterns to do an integer compare of OPERANDS. */
5329 ix86_expand_int_compare (code, op0, op1)
5333 enum machine_mode cmpmode;
5336 cmpmode = SELECT_CC_MODE (code, op0, op1);
5337 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5339 /* This is very simple, but making the interface the same as in the
5340 FP case makes the rest of the code easier. */
5341 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5342 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5344 /* Return the test that should be put into the flags user, i.e.
5345 the bcc, scc, or cmov instruction. */
5346 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5349 /* Figure out whether to use ordered or unordered fp comparisons.
5350 Return the appropriate mode to use. */
5353 ix86_fp_compare_mode (code)
5354 enum rtx_code code ATTRIBUTE_UNUSED;
5356 /* ??? In order to make all comparisons reversible, we do all comparisons
5357 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5358 all forms trapping and nontrapping comparisons, we can make inequality
5359 comparisons trapping again, since it results in better code when using
5360 FCOM based compares. */
5361 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
5365 ix86_cc_mode (code, op0, op1)
5369 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5370 return ix86_fp_compare_mode (code);
5373 /* Only zero flag is needed. */
5375 case NE: /* ZF!=0 */
5377 /* Codes needing carry flag. */
5378 case GEU: /* CF=0 */
5379 case GTU: /* CF=0 & ZF=0 */
5380 case LTU: /* CF=1 */
5381 case LEU: /* CF=1 | ZF=1 */
5383 /* Codes possibly doable only with sign flag when
5384 comparing against zero. */
5385 case GE: /* SF=OF or SF=0 */
5386 case LT: /* SF<>OF or SF=1 */
5387 if (op1 == const0_rtx)
5390 /* For other cases Carry flag is not required. */
5392 /* Codes doable only with sign flag when comparing
5393 against zero, but we miss jump instruction for it
5394 so we need to use relational tests agains overflow
5395 that thus needs to be zero. */
5396 case GT: /* ZF=0 & SF=OF */
5397 case LE: /* ZF=1 | SF<>OF */
5398 if (op1 == const0_rtx)
5407 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5410 ix86_use_fcomi_compare (code)
5411 enum rtx_code code ATTRIBUTE_UNUSED;
5413 enum rtx_code swapped_code = swap_condition (code);
5414 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5415 || (ix86_fp_comparison_cost (swapped_code)
5416 == ix86_fp_comparison_fcomi_cost (swapped_code)));
5419 /* Swap, force into registers, or otherwise massage the two operands
5420 to a fp comparison. The operands are updated in place; the new
5421 comparsion code is returned. */
5423 static enum rtx_code
5424 ix86_prepare_fp_compare_args (code, pop0, pop1)
5428 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5429 rtx op0 = *pop0, op1 = *pop1;
5430 enum machine_mode op_mode = GET_MODE (op0);
5431 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
5433 /* All of the unordered compare instructions only work on registers.
5434 The same is true of the XFmode compare instructions. The same is
5435 true of the fcomi compare instructions. */
5438 && (fpcmp_mode == CCFPUmode
5439 || op_mode == XFmode
5440 || op_mode == TFmode
5441 || ix86_use_fcomi_compare (code)))
5443 op0 = force_reg (op_mode, op0);
5444 op1 = force_reg (op_mode, op1);
5448 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5449 things around if they appear profitable, otherwise force op0
5452 if (standard_80387_constant_p (op0) == 0
5453 || (GET_CODE (op0) == MEM
5454 && ! (standard_80387_constant_p (op1) == 0
5455 || GET_CODE (op1) == MEM)))
5458 tmp = op0, op0 = op1, op1 = tmp;
5459 code = swap_condition (code);
5462 if (GET_CODE (op0) != REG)
5463 op0 = force_reg (op_mode, op0);
5465 if (CONSTANT_P (op1))
5467 if (standard_80387_constant_p (op1))
5468 op1 = force_reg (op_mode, op1);
5470 op1 = validize_mem (force_const_mem (op_mode, op1));
5474 /* Try to rearrange the comparison to make it cheaper. */
5475 if (ix86_fp_comparison_cost (code)
5476 > ix86_fp_comparison_cost (swap_condition (code))
5477 && (GET_CODE (op0) == REG || !reload_completed))
5480 tmp = op0, op0 = op1, op1 = tmp;
5481 code = swap_condition (code);
5482 if (GET_CODE (op0) != REG)
5483 op0 = force_reg (op_mode, op0);
5491 /* Convert comparison codes we use to represent FP comparison to integer
5492 code that will result in proper branch. Return UNKNOWN if no such code
5494 static enum rtx_code
5495 ix86_fp_compare_code_to_integer (code)
5525 /* Split comparison code CODE into comparisons we can do using branch
5526 instructions. BYPASS_CODE is comparison code for branch that will
5527 branch around FIRST_CODE and SECOND_CODE. If some of branches
5528 is not required, set value to NIL.
5529 We never require more than two branches. */
5531 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5532 enum rtx_code code, *bypass_code, *first_code, *second_code;
5538 /* The fcomi comparison sets flags as follows:
5548 case GT: /* GTU - CF=0 & ZF=0 */
5549 case GE: /* GEU - CF=0 */
5550 case ORDERED: /* PF=0 */
5551 case UNORDERED: /* PF=1 */
5552 case UNEQ: /* EQ - ZF=1 */
5553 case UNLT: /* LTU - CF=1 */
5554 case UNLE: /* LEU - CF=1 | ZF=1 */
5555 case LTGT: /* EQ - ZF=0 */
5557 case LT: /* LTU - CF=1 - fails on unordered */
5559 *bypass_code = UNORDERED;
5561 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5563 *bypass_code = UNORDERED;
5565 case EQ: /* EQ - ZF=1 - fails on unordered */
5567 *bypass_code = UNORDERED;
5569 case NE: /* NE - ZF=0 - fails on unordered */
5571 *second_code = UNORDERED;
5573 case UNGE: /* GEU - CF=0 - fails on unordered */
5575 *second_code = UNORDERED;
5577 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5579 *second_code = UNORDERED;
5584 if (!TARGET_IEEE_FP)
5591 /* Return cost of comparison done fcom + arithmetics operations on AX.
5592 All following functions do use number of instructions as an cost metrics.
5593 In future this should be tweaked to compute bytes for optimize_size and
5594 take into account performance of various instructions on various CPUs. */
5596 ix86_fp_comparison_arithmetics_cost (code)
5599 if (!TARGET_IEEE_FP)
5601 /* The cost of code output by ix86_expand_fp_compare. */
5629 /* Return cost of comparison done using fcomi operation.
5630 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5632 ix86_fp_comparison_fcomi_cost (code)
5635 enum rtx_code bypass_code, first_code, second_code;
5636 /* Return arbitarily high cost when instruction is not supported - this
5637 prevents gcc from using it. */
5640 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5641 return (bypass_code != NIL || second_code != NIL) + 2;
5644 /* Return cost of comparison done using sahf operation.
5645 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5647 ix86_fp_comparison_sahf_cost (code)
5650 enum rtx_code bypass_code, first_code, second_code;
5651 /* Return arbitarily high cost when instruction is not preferred - this
5652 avoids gcc from using it. */
5653 if (!TARGET_USE_SAHF && !optimize_size)
5655 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5656 return (bypass_code != NIL || second_code != NIL) + 3;
5659 /* Compute cost of the comparison done using any method.
5660 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5662 ix86_fp_comparison_cost (code)
5665 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5668 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5669 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5671 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5672 if (min > sahf_cost)
5674 if (min > fcomi_cost)
5679 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5682 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5684 rtx op0, op1, scratch;
5688 enum machine_mode fpcmp_mode, intcmp_mode;
5690 int cost = ix86_fp_comparison_cost (code);
5691 enum rtx_code bypass_code, first_code, second_code;
5693 fpcmp_mode = ix86_fp_compare_mode (code);
5694 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5697 *second_test = NULL_RTX;
5699 *bypass_test = NULL_RTX;
5701 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5703 /* Do fcomi/sahf based test when profitable. */
5704 if ((bypass_code == NIL || bypass_test)
5705 && (second_code == NIL || second_test)
5706 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5710 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5711 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5717 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5718 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5720 scratch = gen_reg_rtx (HImode);
5721 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5722 emit_insn (gen_x86_sahf_1 (scratch));
5725 /* The FP codes work out to act like unsigned. */
5726 intcmp_mode = fpcmp_mode;
5728 if (bypass_code != NIL)
5729 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5730 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5732 if (second_code != NIL)
5733 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5734 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5739 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5740 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5741 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5743 scratch = gen_reg_rtx (HImode);
5744 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5746 /* In the unordered case, we have to check C2 for NaN's, which
5747 doesn't happen to work out to anything nice combination-wise.
5748 So do some bit twiddling on the value we've got in AH to come
5749 up with an appropriate set of condition codes. */
5751 intcmp_mode = CCNOmode;
5756 if (code == GT || !TARGET_IEEE_FP)
5758 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5763 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5764 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5765 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5766 intcmp_mode = CCmode;
5772 if (code == LT && TARGET_IEEE_FP)
5774 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5775 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5776 intcmp_mode = CCmode;
5781 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5787 if (code == GE || !TARGET_IEEE_FP)
5789 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5794 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5795 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5802 if (code == LE && TARGET_IEEE_FP)
5804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5805 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5806 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5807 intcmp_mode = CCmode;
5812 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5818 if (code == EQ && TARGET_IEEE_FP)
5820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5822 intcmp_mode = CCmode;
5827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5834 if (code == NE && TARGET_IEEE_FP)
5836 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5837 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5849 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5853 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5862 /* Return the test that should be put into the flags user, i.e.
5863 the bcc, scc, or cmov instruction. */
5864 return gen_rtx_fmt_ee (code, VOIDmode,
5865 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5870 ix86_expand_compare (code, second_test, bypass_test)
5872 rtx *second_test, *bypass_test;
5875 op0 = ix86_compare_op0;
5876 op1 = ix86_compare_op1;
5879 *second_test = NULL_RTX;
5881 *bypass_test = NULL_RTX;
5883 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5884 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5885 second_test, bypass_test);
5887 ret = ix86_expand_int_compare (code, op0, op1);
5893 ix86_expand_branch (code, label)
5899 switch (GET_MODE (ix86_compare_op0))
5905 tmp = ix86_expand_compare (code, NULL, NULL);
5906 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5907 gen_rtx_LABEL_REF (VOIDmode, label),
5909 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5916 /* Don't expand the comparison early, so that we get better code
5917 when jump or whoever decides to reverse the comparison. */
5922 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5925 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5926 ix86_compare_op0, ix86_compare_op1);
5927 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5928 gen_rtx_LABEL_REF (VOIDmode, label),
5930 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5932 use_fcomi = ix86_use_fcomi_compare (code);
5933 vec = rtvec_alloc (3 + !use_fcomi);
5934 RTVEC_ELT (vec, 0) = tmp;
5936 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5938 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5941 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5943 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5950 /* Expand DImode branch into multiple compare+branch. */
5952 rtx lo[2], hi[2], label2;
5953 enum rtx_code code1, code2, code3;
5955 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5957 tmp = ix86_compare_op0;
5958 ix86_compare_op0 = ix86_compare_op1;
5959 ix86_compare_op1 = tmp;
5960 code = swap_condition (code);
5962 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5963 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5965 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5966 avoid two branches. This costs one extra insn, so disable when
5967 optimizing for size. */
5969 if ((code == EQ || code == NE)
5971 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5976 if (hi[1] != const0_rtx)
5977 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5978 NULL_RTX, 0, OPTAB_WIDEN);
5981 if (lo[1] != const0_rtx)
5982 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5983 NULL_RTX, 0, OPTAB_WIDEN);
5985 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5986 NULL_RTX, 0, OPTAB_WIDEN);
5988 ix86_compare_op0 = tmp;
5989 ix86_compare_op1 = const0_rtx;
5990 ix86_expand_branch (code, label);
5994 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5995 op1 is a constant and the low word is zero, then we can just
5996 examine the high word. */
5998 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6001 case LT: case LTU: case GE: case GEU:
6002 ix86_compare_op0 = hi[0];
6003 ix86_compare_op1 = hi[1];
6004 ix86_expand_branch (code, label);
6010 /* Otherwise, we need two or three jumps. */
6012 label2 = gen_label_rtx ();
6015 code2 = swap_condition (code);
6016 code3 = unsigned_condition (code);
6020 case LT: case GT: case LTU: case GTU:
6023 case LE: code1 = LT; code2 = GT; break;
6024 case GE: code1 = GT; code2 = LT; break;
6025 case LEU: code1 = LTU; code2 = GTU; break;
6026 case GEU: code1 = GTU; code2 = LTU; break;
6028 case EQ: code1 = NIL; code2 = NE; break;
6029 case NE: code2 = NIL; break;
6037 * if (hi(a) < hi(b)) goto true;
6038 * if (hi(a) > hi(b)) goto false;
6039 * if (lo(a) < lo(b)) goto true;
6043 ix86_compare_op0 = hi[0];
6044 ix86_compare_op1 = hi[1];
6047 ix86_expand_branch (code1, label);
6049 ix86_expand_branch (code2, label2);
6051 ix86_compare_op0 = lo[0];
6052 ix86_compare_op1 = lo[1];
6053 ix86_expand_branch (code3, label);
6056 emit_label (label2);
6065 /* Split branch based on floating point condition. */
6067 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
6068 rtx condition, op1, op2, target1, target2, tmp;
6071 rtx label = NULL_RTX;
6072 enum rtx_code code = GET_CODE (condition);
6074 if (target2 != pc_rtx)
6077 code = reverse_condition_maybe_unordered (code);
6082 condition = ix86_expand_fp_compare (code, op1, op2,
6083 tmp, &second, &bypass);
6084 if (bypass != NULL_RTX)
6086 label = gen_label_rtx ();
6087 emit_jump_insn (gen_rtx_SET
6089 gen_rtx_IF_THEN_ELSE (VOIDmode,
6091 gen_rtx_LABEL_REF (VOIDmode,
6095 /* AMD Athlon and probably other CPUs too have fast bypass path between the
6096 comparison and first branch. The second branch takes longer to execute
6097 so place first branch the worse predicable one if possible. */
6098 if (second != NULL_RTX
6099 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
6101 rtx tmp = condition;
6105 emit_jump_insn (gen_rtx_SET
6107 gen_rtx_IF_THEN_ELSE (VOIDmode,
6108 condition, target1, target2)));
6109 if (second != NULL_RTX)
6110 emit_jump_insn (gen_rtx_SET
6112 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
6113 if (label != NULL_RTX)
6118 ix86_expand_setcc (code, dest)
6122 rtx ret, tmp, tmpreg;
6123 rtx second_test, bypass_test;
6126 if (GET_MODE (ix86_compare_op0) == DImode
6128 return 0; /* FAIL */
6130 /* Three modes of generation:
6131 0 -- destination does not overlap compare sources:
6132 clear dest first, emit strict_low_part setcc.
6133 1 -- destination does overlap compare sources:
6134 emit subreg setcc, zero extend.
6135 2 -- destination is in QImode:
6141 if (GET_MODE (dest) == QImode)
6143 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
6144 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
6148 emit_move_insn (dest, const0_rtx);
6150 ret = ix86_expand_compare (code, &second_test, &bypass_test);
6151 PUT_MODE (ret, QImode);
6157 tmp = gen_lowpart (QImode, dest);
6159 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6163 if (!cse_not_expected)
6164 tmp = gen_reg_rtx (QImode);
6166 tmp = gen_lowpart (QImode, dest);
6170 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
6171 if (bypass_test || second_test)
6173 rtx test = second_test;
6175 rtx tmp2 = gen_reg_rtx (QImode);
6182 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6184 PUT_MODE (test, QImode);
6185 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6188 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6190 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6197 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6198 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6199 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6200 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6204 return 1; /* DONE */
6208 ix86_expand_int_movcc (operands)
6211 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6212 rtx compare_seq, compare_op;
6213 rtx second_test, bypass_test;
6215 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6216 In case comparsion is done with immediate, we can convert it to LTU or
6217 GEU by altering the integer. */
6219 if ((code == LEU || code == GTU)
6220 && GET_CODE (ix86_compare_op1) == CONST_INT
6221 && GET_MODE (operands[0]) != HImode
6222 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
6223 && GET_CODE (operands[2]) == CONST_INT
6224 && GET_CODE (operands[3]) == CONST_INT)
6230 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6234 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6235 compare_seq = gen_sequence ();
6238 compare_code = GET_CODE (compare_op);
6240 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6241 HImode insns, we'd be swallowed in word prefix ops. */
6243 if (GET_MODE (operands[0]) != HImode
6244 && GET_MODE (operands[0]) != DImode
6245 && GET_CODE (operands[2]) == CONST_INT
6246 && GET_CODE (operands[3]) == CONST_INT)
6248 rtx out = operands[0];
6249 HOST_WIDE_INT ct = INTVAL (operands[2]);
6250 HOST_WIDE_INT cf = INTVAL (operands[3]);
6253 if ((compare_code == LTU || compare_code == GEU)
6254 && !second_test && !bypass_test)
6257 /* Detect overlap between destination and compare sources. */
6260 /* To simplify rest of code, restrict to the GEU case. */
6261 if (compare_code == LTU)
6266 compare_code = reverse_condition (compare_code);
6267 code = reverse_condition (code);
6271 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
6272 || reg_overlap_mentioned_p (out, ix86_compare_op1))
6273 tmp = gen_reg_rtx (SImode);
6275 emit_insn (compare_seq);
6276 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6288 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6299 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
6301 else if (diff == -1 && ct)
6311 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6313 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
6320 * andl cf - ct, dest
6325 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
6326 (cf - ct, SImode))));
6328 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6332 emit_move_insn (out, tmp);
6334 return 1; /* DONE */
6341 tmp = ct, ct = cf, cf = tmp;
6343 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6345 /* We may be reversing unordered compare to normal compare, that
6346 is not valid in general (we may convert non-trapping condition
6347 to trapping one), however on i386 we currently emit all
6348 comparisons unordered. */
6349 compare_code = reverse_condition_maybe_unordered (compare_code);
6350 code = reverse_condition_maybe_unordered (code);
6354 compare_code = reverse_condition (compare_code);
6355 code = reverse_condition (code);
6358 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6359 || diff == 3 || diff == 5 || diff == 9)
6365 * lea cf(dest*(ct-cf)),dest
6369 * This also catches the degenerate setcc-only case.
6375 out = emit_store_flag (out, code, ix86_compare_op0,
6376 ix86_compare_op1, VOIDmode, 0, 1);
6379 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6380 done in proper mode to match. */
6383 if (Pmode != SImode)
6384 tmp = gen_lowpart (Pmode, out);
6391 if (Pmode != SImode)
6392 out1 = gen_lowpart (Pmode, out);
6395 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
6399 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
6405 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
6409 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
6411 if (Pmode != SImode)
6412 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6414 /* ??? We should to take care for outputing non-lea arithmetics
6415 for Pmode != SImode case too, but it is quite tricky and not
6416 too important, since all TARGET_64BIT machines support real
6417 conditional moves. */
6418 if (nops == 1 && Pmode == SImode)
6422 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6423 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6425 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6426 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6430 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6432 if (out != operands[0])
6433 emit_move_insn (operands[0], out);
6435 return 1; /* DONE */
6439 * General case: Jumpful:
6440 * xorl dest,dest cmpl op1, op2
6441 * cmpl op1, op2 movl ct, dest
6443 * decl dest movl cf, dest
6444 * andl (cf-ct),dest 1:
6449 * This is reasonably steep, but branch mispredict costs are
6450 * high on modern cpus, so consider failing only if optimizing
6453 * %%% Parameterize branch_cost on the tuning architecture, then
6454 * use that. The 80386 couldn't care less about mispredicts.
6457 if (!optimize_size && !TARGET_CMOVE)
6463 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6465 /* We may be reversing unordered compare to normal compare,
6466 that is not valid in general (we may convert non-trapping
6467 condition to trapping one), however on i386 we currently
6468 emit all comparisons unordered. */
6469 compare_code = reverse_condition_maybe_unordered (compare_code);
6470 code = reverse_condition_maybe_unordered (code);
6474 compare_code = reverse_condition (compare_code);
6475 code = reverse_condition (code);
6479 out = emit_store_flag (out, code, ix86_compare_op0,
6480 ix86_compare_op1, VOIDmode, 0, 1);
6482 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6483 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6484 (cf - ct, SImode))));
6486 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6487 if (out != operands[0])
6488 emit_move_insn (operands[0], out);
6490 return 1; /* DONE */
6496 /* Try a few things more with specific constants and a variable. */
6499 rtx var, orig_out, out, tmp;
6502 return 0; /* FAIL */
6504 /* If one of the two operands is an interesting constant, load a
6505 constant with the above and mask it in with a logical operation. */
6507 if (GET_CODE (operands[2]) == CONST_INT)
6510 if (INTVAL (operands[2]) == 0)
6511 operands[3] = constm1_rtx, op = and_optab;
6512 else if (INTVAL (operands[2]) == -1)
6513 operands[3] = const0_rtx, op = ior_optab;
6515 return 0; /* FAIL */
6517 else if (GET_CODE (operands[3]) == CONST_INT)
6520 if (INTVAL (operands[3]) == 0)
6521 operands[2] = constm1_rtx, op = and_optab;
6522 else if (INTVAL (operands[3]) == -1)
6523 operands[2] = const0_rtx, op = ior_optab;
6525 return 0; /* FAIL */
6528 return 0; /* FAIL */
6530 orig_out = operands[0];
6531 tmp = gen_reg_rtx (GET_MODE (orig_out));
6534 /* Recurse to get the constant loaded. */
6535 if (ix86_expand_int_movcc (operands) == 0)
6536 return 0; /* FAIL */
6538 /* Mask in the interesting variable. */
6539 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6541 if (out != orig_out)
6542 emit_move_insn (orig_out, out);
6544 return 1; /* DONE */
6548 * For comparison with above,
6558 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6559 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6560 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6561 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6563 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6565 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6566 emit_move_insn (tmp, operands[3]);
6569 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6571 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6572 emit_move_insn (tmp, operands[2]);
6576 emit_insn (compare_seq);
6577 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6578 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6579 compare_op, operands[2],
6582 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6583 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6588 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6589 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6594 return 1; /* DONE */
6598 ix86_expand_fp_movcc (operands)
6603 rtx compare_op, second_test, bypass_test;
6605 /* For SF/DFmode conditional moves based on comparisons
6606 in same mode, we may want to use SSE min/max instructions. */
6607 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6608 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6609 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6610 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6612 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
6613 /* We may be called from the post-reload splitter. */
6614 && (!REG_P (operands[0])
6615 || SSE_REG_P (operands[0])
6616 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
6619 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6620 code = GET_CODE (operands[1]);
6622 /* See if we have (cross) match between comparison operands and
6623 conditional move operands. */
6624 if (rtx_equal_p (operands[2], op1))
6629 code = reverse_condition_maybe_unordered (code);
6631 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6633 /* Check for min operation. */
6636 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6637 if (memory_operand (op0, VOIDmode))
6638 op0 = force_reg (GET_MODE (operands[0]), op0);
6639 if (GET_MODE (operands[0]) == SFmode)
6640 emit_insn (gen_minsf3 (operands[0], op0, op1));
6642 emit_insn (gen_mindf3 (operands[0], op0, op1));
6645 /* Check for max operation. */
6648 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6649 if (memory_operand (op0, VOIDmode))
6650 op0 = force_reg (GET_MODE (operands[0]), op0);
6651 if (GET_MODE (operands[0]) == SFmode)
6652 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6654 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6658 /* Manage condition to be sse_comparison_operator. In case we are
6659 in non-ieee mode, try to canonicalize the destination operand
6660 to be first in the comparison - this helps reload to avoid extra
6662 if (!sse_comparison_operator (operands[1], VOIDmode)
6663 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6665 rtx tmp = ix86_compare_op0;
6666 ix86_compare_op0 = ix86_compare_op1;
6667 ix86_compare_op1 = tmp;
6668 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6669 VOIDmode, ix86_compare_op0,
6672 /* Similary try to manage result to be first operand of conditional
6673 move. We also don't support the NE comparison on SSE, so try to
6675 if ((rtx_equal_p (operands[0], operands[3])
6676 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6677 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
6679 rtx tmp = operands[2];
6680 operands[2] = operands[3];
6682 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6683 (GET_CODE (operands[1])),
6684 VOIDmode, ix86_compare_op0,
6687 if (GET_MODE (operands[0]) == SFmode)
6688 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6689 operands[2], operands[3],
6690 ix86_compare_op0, ix86_compare_op1));
6692 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6693 operands[2], operands[3],
6694 ix86_compare_op0, ix86_compare_op1));
6698 /* The floating point conditional move instructions don't directly
6699 support conditions resulting from a signed integer comparison. */
6701 code = GET_CODE (operands[1]);
6702 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6704 /* The floating point conditional move instructions don't directly
6705 support signed integer comparisons. */
6707 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6709 if (second_test != NULL || bypass_test != NULL)
6711 tmp = gen_reg_rtx (QImode);
6712 ix86_expand_setcc (code, tmp);
6714 ix86_compare_op0 = tmp;
6715 ix86_compare_op1 = const0_rtx;
6716 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6718 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6720 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6721 emit_move_insn (tmp, operands[3]);
6724 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6726 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6727 emit_move_insn (tmp, operands[2]);
6731 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6732 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6737 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6738 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6743 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6744 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6752 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6753 works for floating pointer parameters and nonoffsetable memories.
6754 For pushes, it returns just stack offsets; the values will be saved
6755 in the right order. Maximally three parts are generated. */
6758 ix86_split_to_parts (operand, parts, mode)
6761 enum machine_mode mode;
6766 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
6768 size = (GET_MODE_SIZE (mode) + 4) / 8;
6770 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6772 if (size < 2 || size > 3)
6775 /* Optimize constant pool reference to immediates. This is used by fp moves,
6776 that force all constants to memory to allow combining. */
6778 if (GET_CODE (operand) == MEM
6779 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6780 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6781 operand = get_pool_constant (XEXP (operand, 0));
6783 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6785 /* The only non-offsetable memories we handle are pushes. */
6786 if (! push_operand (operand, VOIDmode))
6789 operand = copy_rtx (operand);
6790 PUT_MODE (operand, Pmode);
6791 parts[0] = parts[1] = parts[2] = operand;
6793 else if (!TARGET_64BIT)
6796 split_di (&operand, 1, &parts[0], &parts[1]);
6799 if (REG_P (operand))
6801 if (!reload_completed)
6803 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6804 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6806 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6808 else if (offsettable_memref_p (operand))
6810 operand = change_address (operand, SImode, XEXP (operand, 0));
6812 parts[1] = adj_offsettable_operand (operand, 4);
6814 parts[2] = adj_offsettable_operand (operand, 8);
6816 else if (GET_CODE (operand) == CONST_DOUBLE)
6821 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6826 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6827 parts[2] = GEN_INT (l[2]);
6830 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6835 parts[1] = GEN_INT (l[1]);
6836 parts[0] = GEN_INT (l[0]);
6844 if (mode == XFmode || mode == TFmode)
6846 if (REG_P (operand))
6848 if (!reload_completed)
6850 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
6851 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6853 else if (offsettable_memref_p (operand))
6855 operand = change_address (operand, DImode, XEXP (operand, 0));
6857 parts[1] = adj_offsettable_operand (operand, 8);
6858 parts[1] = change_address (parts[1], SImode, XEXP (parts[1], 0));
6860 else if (GET_CODE (operand) == CONST_DOUBLE)
6865 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6866 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6867 /* Do not use shift by 32 to avoid warning on 32bit systems. */
6868 if (HOST_BITS_PER_WIDE_INT >= 64)
6869 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
6871 parts[0] = immed_double_const (l[0], l[1], DImode);
6872 parts[1] = GEN_INT (l[2]);
6882 /* Emit insns to perform a move or push of DI, DF, and XF values.
6883 Return false when normal moves are needed; true when all required
6884 insns have been emitted. Operands 2-4 contain the input values
6885 int the correct order; operands 5-7 contain the output values. */
6888 ix86_split_long_move (operands)
6895 enum machine_mode mode = GET_MODE (operands[0]);
6897 /* The DFmode expanders may ask us to move double.
6898 For 64bit target this is single move. By hiding the fact
6899 here we simplify i386.md splitters. */
6900 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
6902 /* Optimize constant pool reference to immediates. This is used by fp moves,
6903 that force all constants to memory to allow combining. */
6905 if (GET_CODE (operands[1]) == MEM
6906 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
6907 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
6908 operands[1] = get_pool_constant (XEXP (operands[1], 0));
6909 if (push_operand (operands[0], VOIDmode))
6911 operands[0] = copy_rtx (operands[0]);
6912 PUT_MODE (operands[0], Pmode);
6915 operands[0] = gen_lowpart (DImode, operands[0]);
6916 operands[1] = gen_lowpart (DImode, operands[1]);
6917 emit_move_insn (operands[0], operands[1]);
6921 /* The only non-offsettable memory we handle is push. */
6922 if (push_operand (operands[0], VOIDmode))
6924 else if (GET_CODE (operands[0]) == MEM
6925 && ! offsettable_memref_p (operands[0]))
6928 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
6929 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
6931 /* When emitting push, take care for source operands on the stack. */
6932 if (push && GET_CODE (operands[1]) == MEM
6933 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6936 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
6937 XEXP (part[1][2], 0));
6938 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
6939 XEXP (part[1][1], 0));
6942 /* We need to do copy in the right order in case an address register
6943 of the source overlaps the destination. */
6944 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6946 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6948 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6951 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6954 /* Collision in the middle part can be handled by reordering. */
6955 if (collisions == 1 && nparts == 3
6956 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6959 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6960 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6963 /* If there are more collisions, we can't handle it by reordering.
6964 Do an lea to the last part and use only one colliding move. */
6965 else if (collisions > 1)
6968 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
6969 XEXP (part[1][0], 0)));
6970 part[1][0] = change_address (part[1][0],
6971 TARGET_64BIT ? DImode : SImode,
6972 part[0][nparts - 1]);
6973 part[1][1] = adj_offsettable_operand (part[1][0],
6975 part[1][1] = change_address (part[1][1], GET_MODE (part[0][1]),
6976 XEXP (part[1][1], 0));
6978 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6988 /* We use only first 12 bytes of TFmode value, but for pushing we
6989 are required to adjust stack as if we were pushing real 16byte
6991 if (mode == TFmode && !TARGET_64BIT)
6992 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6994 emit_move_insn (part[0][2], part[1][2]);
6999 /* In 64bit mode we don't have 32bit push available. In case this is
7000 register, it is OK - we will just use larger counterpart. We also
7001 retype memory - these comes from attempt to avoid REX prefix on
7002 moving of second half of TFmode value. */
7003 if (GET_MODE (part[1][1]) == SImode)
7005 if (GET_CODE (part[1][1]) == MEM)
7006 part[1][1] = change_address (part[1][1], DImode, XEXP (part[1][1], 0));
7007 else if (REG_P (part[1][1]))
7008 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7011 if (GET_MODE (part[1][0]) == SImode)
7012 part[1][0] = part[1][1];
7015 emit_move_insn (part[0][1], part[1][1]);
7016 emit_move_insn (part[0][0], part[1][0]);
7020 /* Choose correct order to not overwrite the source before it is copied. */
7021 if ((REG_P (part[0][0])
7022 && REG_P (part[1][1])
7023 && (REGNO (part[0][0]) == REGNO (part[1][1])
7025 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7027 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7031 operands[2] = part[0][2];
7032 operands[3] = part[0][1];
7033 operands[4] = part[0][0];
7034 operands[5] = part[1][2];
7035 operands[6] = part[1][1];
7036 operands[7] = part[1][0];
7040 operands[2] = part[0][1];
7041 operands[3] = part[0][0];
7042 operands[5] = part[1][1];
7043 operands[6] = part[1][0];
7050 operands[2] = part[0][0];
7051 operands[3] = part[0][1];
7052 operands[4] = part[0][2];
7053 operands[5] = part[1][0];
7054 operands[6] = part[1][1];
7055 operands[7] = part[1][2];
7059 operands[2] = part[0][0];
7060 operands[3] = part[0][1];
7061 operands[5] = part[1][0];
7062 operands[6] = part[1][1];
7065 emit_move_insn (operands[2], operands[5]);
7066 emit_move_insn (operands[3], operands[6]);
7068 emit_move_insn (operands[4], operands[7]);
7074 ix86_split_ashldi (operands, scratch)
7075 rtx *operands, scratch;
7077 rtx low[2], high[2];
7080 if (GET_CODE (operands[2]) == CONST_INT)
7082 split_di (operands, 2, low, high);
7083 count = INTVAL (operands[2]) & 63;
7087 emit_move_insn (high[0], low[1]);
7088 emit_move_insn (low[0], const0_rtx);
7091 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7095 if (!rtx_equal_p (operands[0], operands[1]))
7096 emit_move_insn (operands[0], operands[1]);
7097 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7098 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7103 if (!rtx_equal_p (operands[0], operands[1]))
7104 emit_move_insn (operands[0], operands[1]);
7106 split_di (operands, 1, low, high);
7108 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7109 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
7111 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7113 if (! no_new_pseudos)
7114 scratch = force_reg (SImode, const0_rtx);
7116 emit_move_insn (scratch, const0_rtx);
7118 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7122 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7127 ix86_split_ashrdi (operands, scratch)
7128 rtx *operands, scratch;
7130 rtx low[2], high[2];
7133 if (GET_CODE (operands[2]) == CONST_INT)
7135 split_di (operands, 2, low, high);
7136 count = INTVAL (operands[2]) & 63;
7140 emit_move_insn (low[0], high[1]);
7142 if (! reload_completed)
7143 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7146 emit_move_insn (high[0], low[0]);
7147 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7151 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7155 if (!rtx_equal_p (operands[0], operands[1]))
7156 emit_move_insn (operands[0], operands[1]);
7157 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7158 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7163 if (!rtx_equal_p (operands[0], operands[1]))
7164 emit_move_insn (operands[0], operands[1]);
7166 split_di (operands, 1, low, high);
7168 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7169 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7171 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7173 if (! no_new_pseudos)
7174 scratch = gen_reg_rtx (SImode);
7175 emit_move_insn (scratch, high[0]);
7176 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7177 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7181 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
7186 ix86_split_lshrdi (operands, scratch)
7187 rtx *operands, scratch;
7189 rtx low[2], high[2];
7192 if (GET_CODE (operands[2]) == CONST_INT)
7194 split_di (operands, 2, low, high);
7195 count = INTVAL (operands[2]) & 63;
7199 emit_move_insn (low[0], high[1]);
7200 emit_move_insn (high[0], const0_rtx);
7203 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7207 if (!rtx_equal_p (operands[0], operands[1]))
7208 emit_move_insn (operands[0], operands[1]);
7209 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7210 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7215 if (!rtx_equal_p (operands[0], operands[1]))
7216 emit_move_insn (operands[0], operands[1]);
7218 split_di (operands, 1, low, high);
7220 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7221 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7223 /* Heh. By reversing the arguments, we can reuse this pattern. */
7224 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7226 if (! no_new_pseudos)
7227 scratch = force_reg (SImode, const0_rtx);
7229 emit_move_insn (scratch, const0_rtx);
7231 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7235 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7239 /* Helper function for the string operations bellow. Dest VARIABLE whether
7240 it is aligned to VALUE bytes. If true, jump to the label. */
7242 ix86_expand_aligntest (variable, value)
7246 rtx label = gen_label_rtx ();
7247 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7248 if (GET_MODE (variable) == DImode)
7249 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7251 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7252 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7257 /* Adjust COUNTER by the VALUE. */
7259 ix86_adjust_counter (countreg, value)
7261 HOST_WIDE_INT value;
7263 if (GET_MODE (countreg) == DImode)
7264 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7266 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7269 /* Zero extend possibly SImode EXP to Pmode register. */
7271 ix86_zero_extend_to_Pmode (exp)
7275 if (GET_MODE (exp) == VOIDmode)
7276 return force_reg (Pmode, exp);
7277 if (GET_MODE (exp) == Pmode)
7278 return copy_to_mode_reg (Pmode, exp);
7279 r = gen_reg_rtx (Pmode);
7280 emit_insn (gen_zero_extendsidi2 (r, exp));
7284 /* Expand string move (memcpy) operation. Use i386 string operations when
7285 profitable. expand_clrstr contains similar code. */
7287 ix86_expand_movstr (dst, src, count_exp, align_exp)
7288 rtx dst, src, count_exp, align_exp;
7290 rtx srcreg, destreg, countreg;
7291 enum machine_mode counter_mode;
7292 HOST_WIDE_INT align = 0;
7293 unsigned HOST_WIDE_INT count = 0;
7298 if (GET_CODE (align_exp) == CONST_INT)
7299 align = INTVAL (align_exp);
7301 /* This simple hack avoids all inlining code and simplifies code bellow. */
7302 if (!TARGET_ALIGN_STRINGOPS)
7305 if (GET_CODE (count_exp) == CONST_INT)
7306 count = INTVAL (count_exp);
7308 /* Figure out proper mode for counter. For 32bits it is always SImode,
7309 for 64bits use SImode when possible, otherwise DImode.
7310 Set count to number of bytes copied when known at compile time. */
7311 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7312 || x86_64_zero_extended_value (count_exp))
7313 counter_mode = SImode;
7315 counter_mode = DImode;
7317 if (counter_mode != SImode && counter_mode != DImode)
7320 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7321 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7323 emit_insn (gen_cld ());
7325 /* When optimizing for size emit simple rep ; movsb instruction for
7326 counts not divisible by 4. */
7328 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7330 countreg = ix86_zero_extend_to_Pmode (count_exp);
7332 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7333 destreg, srcreg, countreg));
7335 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7336 destreg, srcreg, countreg));
7339 /* For constant aligned (or small unaligned) copies use rep movsl
7340 followed by code copying the rest. For PentiumPro ensure 8 byte
7341 alignment to allow rep movsl acceleration. */
7345 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7346 || optimize_size || count < (unsigned int)64))
7348 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7349 if (count & ~(size - 1))
7351 countreg = copy_to_mode_reg (counter_mode,
7352 GEN_INT ((count >> (size == 4 ? 2 : 3))
7353 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7354 countreg = ix86_zero_extend_to_Pmode (countreg);
7358 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7359 destreg, srcreg, countreg));
7361 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7362 destreg, srcreg, countreg));
7365 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7366 destreg, srcreg, countreg));
7368 if (size == 8 && (count & 0x04))
7369 emit_insn (gen_strmovsi (destreg, srcreg));
7371 emit_insn (gen_strmovhi (destreg, srcreg));
7373 emit_insn (gen_strmovqi (destreg, srcreg));
7375 /* The generic code based on the glibc implementation:
7376 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7377 allowing accelerated copying there)
7378 - copy the data using rep movsl
7385 /* In case we don't know anything about the alignment, default to
7386 library version, since it is usually equally fast and result in
7388 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7394 if (TARGET_SINGLE_STRINGOP)
7395 emit_insn (gen_cld ());
7397 countreg2 = gen_reg_rtx (Pmode);
7398 countreg = copy_to_mode_reg (counter_mode, count_exp);
7400 /* We don't use loops to align destination and to copy parts smaller
7401 than 4 bytes, because gcc is able to optimize such code better (in
7402 the case the destination or the count really is aligned, gcc is often
7403 able to predict the branches) and also it is friendlier to the
7404 hardware branch prediction.
7406 Using loops is benefical for generic case, because we can
7407 handle small counts using the loops. Many CPUs (such as Athlon)
7408 have large REP prefix setup costs.
7410 This is quite costy. Maybe we can revisit this decision later or
7411 add some customizability to this code. */
7414 && align < (TARGET_PENTIUMPRO && (count == 0
7415 || count >= (unsigned int)260)
7416 ? 8 : UNITS_PER_WORD))
7418 label = gen_label_rtx ();
7419 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7420 LEU, 0, counter_mode, 1, 0, label);
7424 rtx label = ix86_expand_aligntest (destreg, 1);
7425 emit_insn (gen_strmovqi (destreg, srcreg));
7426 ix86_adjust_counter (countreg, 1);
7428 LABEL_NUSES (label) = 1;
7432 rtx label = ix86_expand_aligntest (destreg, 2);
7433 emit_insn (gen_strmovhi (destreg, srcreg));
7434 ix86_adjust_counter (countreg, 2);
7436 LABEL_NUSES (label) = 1;
7439 && ((TARGET_PENTIUMPRO && (count == 0
7440 || count >= (unsigned int)260))
7443 rtx label = ix86_expand_aligntest (destreg, 4);
7444 emit_insn (gen_strmovsi (destreg, srcreg));
7445 ix86_adjust_counter (countreg, 4);
7447 LABEL_NUSES (label) = 1;
7450 if (!TARGET_SINGLE_STRINGOP)
7451 emit_insn (gen_cld ());
7454 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7456 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7457 destreg, srcreg, countreg2));
7461 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7462 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7463 destreg, srcreg, countreg2));
7469 LABEL_NUSES (label) = 1;
7471 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7472 emit_insn (gen_strmovsi (destreg, srcreg));
7473 if ((align <= 4 || count == 0) && TARGET_64BIT)
7475 rtx label = ix86_expand_aligntest (countreg, 4);
7476 emit_insn (gen_strmovsi (destreg, srcreg));
7478 LABEL_NUSES (label) = 1;
7480 if (align > 2 && count != 0 && (count & 2))
7481 emit_insn (gen_strmovhi (destreg, srcreg));
7482 if (align <= 2 || count == 0)
7484 rtx label = ix86_expand_aligntest (countreg, 2);
7485 emit_insn (gen_strmovhi (destreg, srcreg));
7487 LABEL_NUSES (label) = 1;
7489 if (align > 1 && count != 0 && (count & 1))
7490 emit_insn (gen_strmovqi (destreg, srcreg));
7491 if (align <= 1 || count == 0)
7493 rtx label = ix86_expand_aligntest (countreg, 1);
7494 emit_insn (gen_strmovqi (destreg, srcreg));
7496 LABEL_NUSES (label) = 1;
7500 insns = get_insns ();
7503 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7508 /* Expand string clear operation (bzero). Use i386 string operations when
7509 profitable. expand_movstr contains similar code. */
7511 ix86_expand_clrstr (src, count_exp, align_exp)
7512 rtx src, count_exp, align_exp;
7514 rtx destreg, zeroreg, countreg;
7515 enum machine_mode counter_mode;
7516 HOST_WIDE_INT align = 0;
7517 unsigned HOST_WIDE_INT count = 0;
7519 if (GET_CODE (align_exp) == CONST_INT)
7520 align = INTVAL (align_exp);
7522 /* This simple hack avoids all inlining code and simplifies code bellow. */
7523 if (!TARGET_ALIGN_STRINGOPS)
7526 if (GET_CODE (count_exp) == CONST_INT)
7527 count = INTVAL (count_exp);
7528 /* Figure out proper mode for counter. For 32bits it is always SImode,
7529 for 64bits use SImode when possible, otherwise DImode.
7530 Set count to number of bytes copied when known at compile time. */
7531 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7532 || x86_64_zero_extended_value (count_exp))
7533 counter_mode = SImode;
7535 counter_mode = DImode;
7537 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7539 emit_insn (gen_cld ());
7541 /* When optimizing for size emit simple rep ; movsb instruction for
7542 counts not divisible by 4. */
7544 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7546 countreg = ix86_zero_extend_to_Pmode (count_exp);
7547 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7549 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7550 destreg, countreg));
7552 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7553 destreg, countreg));
7557 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7558 || optimize_size || count < (unsigned int)64))
7560 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7561 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7562 if (count & ~(size - 1))
7564 countreg = copy_to_mode_reg (counter_mode,
7565 GEN_INT ((count >> (size == 4 ? 2 : 3))
7566 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7567 countreg = ix86_zero_extend_to_Pmode (countreg);
7571 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7572 destreg, countreg));
7574 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7575 destreg, countreg));
7578 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7579 destreg, countreg));
7581 if (size == 8 && (count & 0x04))
7582 emit_insn (gen_strsetsi (destreg,
7583 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7585 emit_insn (gen_strsethi (destreg,
7586 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7588 emit_insn (gen_strsetqi (destreg,
7589 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7596 /* In case we don't know anything about the alignment, default to
7597 library version, since it is usually equally fast and result in
7599 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7602 if (TARGET_SINGLE_STRINGOP)
7603 emit_insn (gen_cld ());
7605 countreg2 = gen_reg_rtx (Pmode);
7606 countreg = copy_to_mode_reg (counter_mode, count_exp);
7607 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7610 && align < (TARGET_PENTIUMPRO && (count == 0
7611 || count >= (unsigned int)260)
7612 ? 8 : UNITS_PER_WORD))
7614 label = gen_label_rtx ();
7615 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7616 LEU, 0, counter_mode, 1, 0, label);
7620 rtx label = ix86_expand_aligntest (destreg, 1);
7621 emit_insn (gen_strsetqi (destreg,
7622 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7623 ix86_adjust_counter (countreg, 1);
7625 LABEL_NUSES (label) = 1;
7629 rtx label = ix86_expand_aligntest (destreg, 2);
7630 emit_insn (gen_strsethi (destreg,
7631 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7632 ix86_adjust_counter (countreg, 2);
7634 LABEL_NUSES (label) = 1;
7636 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7637 || count >= (unsigned int)260))
7639 rtx label = ix86_expand_aligntest (destreg, 4);
7640 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7641 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7643 ix86_adjust_counter (countreg, 4);
7645 LABEL_NUSES (label) = 1;
7648 if (!TARGET_SINGLE_STRINGOP)
7649 emit_insn (gen_cld ());
7652 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7654 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7655 destreg, countreg2));
7659 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7660 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7661 destreg, countreg2));
7667 LABEL_NUSES (label) = 1;
7669 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7670 emit_insn (gen_strsetsi (destreg,
7671 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7672 if (TARGET_64BIT && (align <= 4 || count == 0))
7674 rtx label = ix86_expand_aligntest (destreg, 2);
7675 emit_insn (gen_strsetsi (destreg,
7676 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7678 LABEL_NUSES (label) = 1;
7680 if (align > 2 && count != 0 && (count & 2))
7681 emit_insn (gen_strsethi (destreg,
7682 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7683 if (align <= 2 || count == 0)
7685 rtx label = ix86_expand_aligntest (destreg, 2);
7686 emit_insn (gen_strsethi (destreg,
7687 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7689 LABEL_NUSES (label) = 1;
7691 if (align > 1 && count != 0 && (count & 1))
7692 emit_insn (gen_strsetqi (destreg,
7693 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7694 if (align <= 1 || count == 0)
7696 rtx label = ix86_expand_aligntest (destreg, 1);
7697 emit_insn (gen_strsetqi (destreg,
7698 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7700 LABEL_NUSES (label) = 1;
7705 /* Expand strlen. */
7707 ix86_expand_strlen (out, src, eoschar, align)
7708 rtx out, src, eoschar, align;
7710 rtx addr, scratch1, scratch2, scratch3, scratch4;
7712 /* The generic case of strlen expander is long. Avoid it's
7713 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7715 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7716 && !TARGET_INLINE_ALL_STRINGOPS
7718 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7721 addr = force_reg (Pmode, XEXP (src, 0));
7722 scratch1 = gen_reg_rtx (Pmode);
7724 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7727 /* Well it seems that some optimizer does not combine a call like
7728 foo(strlen(bar), strlen(bar));
7729 when the move and the subtraction is done here. It does calculate
7730 the length just once when these instructions are done inside of
7731 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7732 often used and I use one fewer register for the lifetime of
7733 output_strlen_unroll() this is better. */
7735 emit_move_insn (out, addr);
7737 ix86_expand_strlensi_unroll_1 (out, align);
7739 /* strlensi_unroll_1 returns the address of the zero at the end of
7740 the string, like memchr(), so compute the length by subtracting
7741 the start address. */
7743 emit_insn (gen_subdi3 (out, out, addr));
7745 emit_insn (gen_subsi3 (out, out, addr));
7749 scratch2 = gen_reg_rtx (Pmode);
7750 scratch3 = gen_reg_rtx (Pmode);
7751 scratch4 = force_reg (Pmode, constm1_rtx);
7753 emit_move_insn (scratch3, addr);
7754 eoschar = force_reg (QImode, eoschar);
7756 emit_insn (gen_cld ());
7759 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7760 align, scratch4, scratch3));
7761 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7762 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7766 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
7767 align, scratch4, scratch3));
7768 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
7769 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
7775 /* Expand the appropriate insns for doing strlen if not just doing
7778 out = result, initialized with the start address
7779 align_rtx = alignment of the address.
7780 scratch = scratch register, initialized with the startaddress when
7781 not aligned, otherwise undefined
7783 This is just the body. It needs the initialisations mentioned above and
7784 some address computing at the end. These things are done in i386.md. */
7787 ix86_expand_strlensi_unroll_1 (out, align_rtx)
7792 rtx align_2_label = NULL_RTX;
7793 rtx align_3_label = NULL_RTX;
7794 rtx align_4_label = gen_label_rtx ();
7795 rtx end_0_label = gen_label_rtx ();
7797 rtx tmpreg = gen_reg_rtx (SImode);
7798 rtx scratch = gen_reg_rtx (SImode);
7801 if (GET_CODE (align_rtx) == CONST_INT)
7802 align = INTVAL (align_rtx);
7804 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
7806 /* Is there a known alignment and is it less than 4? */
7809 rtx scratch1 = gen_reg_rtx (Pmode);
7810 emit_move_insn (scratch1, out);
7811 /* Is there a known alignment and is it not 2? */
7814 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
7815 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
7817 /* Leave just the 3 lower bits. */
7818 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
7819 NULL_RTX, 0, OPTAB_WIDEN);
7821 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
7822 Pmode, 1, 0, align_4_label);
7823 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
7824 Pmode, 1, 0, align_2_label);
7825 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
7826 Pmode, 1, 0, align_3_label);
7830 /* Since the alignment is 2, we have to check 2 or 0 bytes;
7831 check if is aligned to 4 - byte. */
7833 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
7834 NULL_RTX, 0, OPTAB_WIDEN);
7836 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
7837 Pmode, 1, 0, align_4_label);
7840 mem = gen_rtx_MEM (QImode, out);
7842 /* Now compare the bytes. */
7844 /* Compare the first n unaligned byte on a byte per byte basis. */
7845 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7846 QImode, 1, 0, end_0_label);
7848 /* Increment the address. */
7850 emit_insn (gen_adddi3 (out, out, const1_rtx));
7852 emit_insn (gen_addsi3 (out, out, const1_rtx));
7854 /* Not needed with an alignment of 2 */
7857 emit_label (align_2_label);
7859 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7860 QImode, 1, 0, end_0_label);
7863 emit_insn (gen_adddi3 (out, out, const1_rtx));
7865 emit_insn (gen_addsi3 (out, out, const1_rtx));
7867 emit_label (align_3_label);
7870 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7871 QImode, 1, 0, end_0_label);
7874 emit_insn (gen_adddi3 (out, out, const1_rtx));
7876 emit_insn (gen_addsi3 (out, out, const1_rtx));
7879 /* Generate loop to check 4 bytes at a time. It is not a good idea to
7880 align this loop. It gives only huge programs, but does not help to
7882 emit_label (align_4_label);
7884 mem = gen_rtx_MEM (SImode, out);
7885 emit_move_insn (scratch, mem);
7887 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
7889 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
7891 /* This formula yields a nonzero result iff one of the bytes is zero.
7892 This saves three branches inside loop and many cycles. */
7894 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
7895 emit_insn (gen_one_cmplsi2 (scratch, scratch));
7896 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7897 emit_insn (gen_andsi3 (tmpreg, tmpreg,
7898 GEN_INT (trunc_int_for_mode
7899 (0x80808080, SImode))));
7900 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
7901 SImode, 1, 0, align_4_label);
7905 rtx reg = gen_reg_rtx (SImode);
7906 rtx reg2 = gen_reg_rtx (Pmode);
7907 emit_move_insn (reg, tmpreg);
7908 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
7910 /* If zero is not in the first two bytes, move two bytes forward. */
7911 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7912 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7913 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7914 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
7915 gen_rtx_IF_THEN_ELSE (SImode, tmp,
7918 /* Emit lea manually to avoid clobbering of flags. */
7919 emit_insn (gen_rtx_SET (SImode, reg2,
7920 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
7922 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7923 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7924 emit_insn (gen_rtx_SET (VOIDmode, out,
7925 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
7932 rtx end_2_label = gen_label_rtx ();
7933 /* Is zero in the first two bytes? */
7935 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7936 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7937 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
7938 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7939 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
7941 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7942 JUMP_LABEL (tmp) = end_2_label;
7944 /* Not in the first two. Move two bytes forward. */
7945 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
7947 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
7949 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
7951 emit_label (end_2_label);
7955 /* Avoid branch in fixing the byte. */
7956 tmpreg = gen_lowpart (QImode, tmpreg);
7957 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
7959 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
7961 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
7963 emit_label (end_0_label);
7966 /* Clear stack slot assignments remembered from previous functions.
7967 This is called from INIT_EXPANDERS once before RTL is emitted for each
7971 ix86_init_machine_status (p)
7974 p->machine = (struct machine_function *)
7975 xcalloc (1, sizeof (struct machine_function));
7978 /* Mark machine specific bits of P for GC. */
7980 ix86_mark_machine_status (p)
7983 struct machine_function *machine = p->machine;
7984 enum machine_mode mode;
7990 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
7991 mode = (enum machine_mode) ((int) mode + 1))
7992 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
7993 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
7997 ix86_free_machine_status (p)
8004 /* Return a MEM corresponding to a stack slot with mode MODE.
8005 Allocate a new slot if necessary.
8007 The RTL for a function can have several slots available: N is
8008 which slot to use. */
8011 assign_386_stack_local (mode, n)
8012 enum machine_mode mode;
8015 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8018 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8019 ix86_stack_locals[(int) mode][n]
8020 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8022 return ix86_stack_locals[(int) mode][n];
8025 /* Calculate the length of the memory address in the instruction
8026 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8029 memory_address_length (addr)
8032 struct ix86_address parts;
8033 rtx base, index, disp;
8036 if (GET_CODE (addr) == PRE_DEC
8037 || GET_CODE (addr) == POST_INC
8038 || GET_CODE (addr) == PRE_MODIFY
8039 || GET_CODE (addr) == POST_MODIFY)
8042 if (! ix86_decompose_address (addr, &parts))
8046 index = parts.index;
8050 /* Register Indirect. */
8051 if (base && !index && !disp)
8053 /* Special cases: ebp and esp need the two-byte modrm form. */
8054 if (addr == stack_pointer_rtx
8055 || addr == arg_pointer_rtx
8056 || addr == frame_pointer_rtx
8057 || addr == hard_frame_pointer_rtx)
8061 /* Direct Addressing. */
8062 else if (disp && !base && !index)
8067 /* Find the length of the displacement constant. */
8070 if (GET_CODE (disp) == CONST_INT
8071 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8077 /* An index requires the two-byte modrm form. */
8085 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8086 expect that insn have 8bit immediate alternative. */
8088 ix86_attr_length_immediate_default (insn, shortform)
8094 extract_insn_cached (insn);
8095 for (i = recog_data.n_operands - 1; i >= 0; --i)
8096 if (CONSTANT_P (recog_data.operand[i]))
8101 && GET_CODE (recog_data.operand[i]) == CONST_INT
8102 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8106 switch (get_attr_mode (insn))
8118 fatal_insn ("Unknown insn mode", insn);
8124 /* Compute default value for "length_address" attribute. */
8126 ix86_attr_length_address_default (insn)
8130 extract_insn_cached (insn);
8131 for (i = recog_data.n_operands - 1; i >= 0; --i)
8132 if (GET_CODE (recog_data.operand[i]) == MEM)
8134 return memory_address_length (XEXP (recog_data.operand[i], 0));
8140 /* Return the maximum number of instructions a cpu can issue. */
8147 case PROCESSOR_PENTIUM:
8151 case PROCESSOR_PENTIUMPRO:
8152 case PROCESSOR_PENTIUM4:
8153 case PROCESSOR_ATHLON:
8161 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8162 by DEP_INSN and nothing set by DEP_INSN. */
8165 ix86_flags_dependant (insn, dep_insn, insn_type)
8167 enum attr_type insn_type;
8171 /* Simplify the test for uninteresting insns. */
8172 if (insn_type != TYPE_SETCC
8173 && insn_type != TYPE_ICMOV
8174 && insn_type != TYPE_FCMOV
8175 && insn_type != TYPE_IBR)
8178 if ((set = single_set (dep_insn)) != 0)
8180 set = SET_DEST (set);
8183 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8184 && XVECLEN (PATTERN (dep_insn), 0) == 2
8185 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8186 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8188 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8189 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8194 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8197 /* This test is true if the dependant insn reads the flags but
8198 not any other potentially set register. */
8199 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8202 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8208 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8209 address with operands set by DEP_INSN. */
8212 ix86_agi_dependant (insn, dep_insn, insn_type)
8214 enum attr_type insn_type;
8218 if (insn_type == TYPE_LEA)
8220 addr = PATTERN (insn);
8221 if (GET_CODE (addr) == SET)
8223 else if (GET_CODE (addr) == PARALLEL
8224 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8225 addr = XVECEXP (addr, 0, 0);
8228 addr = SET_SRC (addr);
8233 extract_insn_cached (insn);
8234 for (i = recog_data.n_operands - 1; i >= 0; --i)
8235 if (GET_CODE (recog_data.operand[i]) == MEM)
8237 addr = XEXP (recog_data.operand[i], 0);
8244 return modified_in_p (addr, dep_insn);
8248 ix86_adjust_cost (insn, link, dep_insn, cost)
8249 rtx insn, link, dep_insn;
8252 enum attr_type insn_type, dep_insn_type;
8253 enum attr_memory memory;
8255 int dep_insn_code_number;
8257 /* Anti and output depenancies have zero cost on all CPUs. */
8258 if (REG_NOTE_KIND (link) != 0)
8261 dep_insn_code_number = recog_memoized (dep_insn);
8263 /* If we can't recognize the insns, we can't really do anything. */
8264 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
8267 insn_type = get_attr_type (insn);
8268 dep_insn_type = get_attr_type (dep_insn);
8270 /* Prologue and epilogue allocators can have a false dependency on ebp.
8271 This results in one cycle extra stall on Pentium prologue scheduling,
8272 so handle this important case manually. */
8273 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
8274 && dep_insn_type == TYPE_ALU
8275 && !reg_mentioned_p (stack_pointer_rtx, insn))
8280 case PROCESSOR_PENTIUM:
8281 /* Address Generation Interlock adds a cycle of latency. */
8282 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8285 /* ??? Compares pair with jump/setcc. */
8286 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8289 /* Floating point stores require value to be ready one cycle ealier. */
8290 if (insn_type == TYPE_FMOV
8291 && get_attr_memory (insn) == MEMORY_STORE
8292 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8296 case PROCESSOR_PENTIUMPRO:
8297 /* Since we can't represent delayed latencies of load+operation,
8298 increase the cost here for non-imov insns. */
8299 if (dep_insn_type != TYPE_IMOV
8300 && dep_insn_type != TYPE_FMOV
8301 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8302 || memory == MEMORY_BOTH))
8305 /* INT->FP conversion is expensive. */
8306 if (get_attr_fp_int_src (dep_insn))
8309 /* There is one cycle extra latency between an FP op and a store. */
8310 if (insn_type == TYPE_FMOV
8311 && (set = single_set (dep_insn)) != NULL_RTX
8312 && (set2 = single_set (insn)) != NULL_RTX
8313 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8314 && GET_CODE (SET_DEST (set2)) == MEM)
8319 /* The esp dependency is resolved before the instruction is really
8321 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8322 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8325 /* Since we can't represent delayed latencies of load+operation,
8326 increase the cost here for non-imov insns. */
8327 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8328 || memory == MEMORY_BOTH)
8329 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8331 /* INT->FP conversion is expensive. */
8332 if (get_attr_fp_int_src (dep_insn))
8336 case PROCESSOR_ATHLON:
8337 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
8338 || memory == MEMORY_BOTH)
8340 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8355 struct ppro_sched_data
8358 int issued_this_cycle;
8363 ix86_safe_length (insn)
8366 if (recog_memoized (insn) >= 0)
8367 return get_attr_length(insn);
8373 ix86_safe_length_prefix (insn)
8376 if (recog_memoized (insn) >= 0)
8377 return get_attr_length(insn);
8382 static enum attr_memory
8383 ix86_safe_memory (insn)
8386 if (recog_memoized (insn) >= 0)
8387 return get_attr_memory(insn);
8389 return MEMORY_UNKNOWN;
8392 static enum attr_pent_pair
8393 ix86_safe_pent_pair (insn)
8396 if (recog_memoized (insn) >= 0)
8397 return get_attr_pent_pair(insn);
8399 return PENT_PAIR_NP;
8402 static enum attr_ppro_uops
8403 ix86_safe_ppro_uops (insn)
8406 if (recog_memoized (insn) >= 0)
8407 return get_attr_ppro_uops (insn);
8409 return PPRO_UOPS_MANY;
8413 ix86_dump_ppro_packet (dump)
8416 if (ix86_sched_data.ppro.decode[0])
8418 fprintf (dump, "PPRO packet: %d",
8419 INSN_UID (ix86_sched_data.ppro.decode[0]));
8420 if (ix86_sched_data.ppro.decode[1])
8421 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8422 if (ix86_sched_data.ppro.decode[2])
8423 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8428 /* We're beginning a new block. Initialize data structures as necessary. */
8431 ix86_sched_init (dump, sched_verbose)
8432 FILE *dump ATTRIBUTE_UNUSED;
8433 int sched_verbose ATTRIBUTE_UNUSED;
8435 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8438 /* Shift INSN to SLOT, and shift everything else down. */
8441 ix86_reorder_insn (insnp, slot)
8448 insnp[0] = insnp[1];
8449 while (++insnp != slot);
8454 /* Find an instruction with given pairability and minimal amount of cycles
8455 lost by the fact that the CPU waits for both pipelines to finish before
8456 reading next instructions. Also take care that both instructions together
8457 can not exceed 7 bytes. */
8460 ix86_pent_find_pair (e_ready, ready, type, first)
8463 enum attr_pent_pair type;
8466 int mincycles, cycles;
8467 enum attr_pent_pair tmp;
8468 enum attr_memory memory;
8469 rtx *insnp, *bestinsnp = NULL;
8471 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8474 memory = ix86_safe_memory (first);
8475 cycles = result_ready_cost (first);
8476 mincycles = INT_MAX;
8478 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8479 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8480 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
8482 enum attr_memory second_memory;
8483 int secondcycles, currentcycles;
8485 second_memory = ix86_safe_memory (*insnp);
8486 secondcycles = result_ready_cost (*insnp);
8487 currentcycles = abs (cycles - secondcycles);
8489 if (secondcycles >= 1 && cycles >= 1)
8491 /* Two read/modify/write instructions together takes two
8493 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8496 /* Read modify/write instruction followed by read/modify
8497 takes one cycle longer. */
8498 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8499 && tmp != PENT_PAIR_UV
8500 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8503 if (currentcycles < mincycles)
8504 bestinsnp = insnp, mincycles = currentcycles;
8510 /* Subroutines of ix86_sched_reorder. */
8513 ix86_sched_reorder_pentium (ready, e_ready)
8517 enum attr_pent_pair pair1, pair2;
8520 /* This wouldn't be necessary if Haifa knew that static insn ordering
8521 is important to which pipe an insn is issued to. So we have to make
8522 some minor rearrangements. */
8524 pair1 = ix86_safe_pent_pair (*e_ready);
8526 /* If the first insn is non-pairable, let it be. */
8527 if (pair1 == PENT_PAIR_NP)
8530 pair2 = PENT_PAIR_NP;
8533 /* If the first insn is UV or PV pairable, search for a PU
8535 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
8537 insnp = ix86_pent_find_pair (e_ready-1, ready,
8538 PENT_PAIR_PU, *e_ready);
8540 pair2 = PENT_PAIR_PU;
8543 /* If the first insn is PU or UV pairable, search for a PV
8545 if (pair2 == PENT_PAIR_NP
8546 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8548 insnp = ix86_pent_find_pair (e_ready-1, ready,
8549 PENT_PAIR_PV, *e_ready);
8551 pair2 = PENT_PAIR_PV;
8554 /* If the first insn is pairable, search for a UV
8556 if (pair2 == PENT_PAIR_NP)
8558 insnp = ix86_pent_find_pair (e_ready-1, ready,
8559 PENT_PAIR_UV, *e_ready);
8561 pair2 = PENT_PAIR_UV;
8564 if (pair2 == PENT_PAIR_NP)
8567 /* Found something! Decide if we need to swap the order. */
8568 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8569 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8570 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8571 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8572 ix86_reorder_insn (insnp, e_ready);
8574 ix86_reorder_insn (insnp, e_ready - 1);
8578 ix86_sched_reorder_ppro (ready, e_ready)
8583 enum attr_ppro_uops cur_uops;
8584 int issued_this_cycle;
8588 /* At this point .ppro.decode contains the state of the three
8589 decoders from last "cycle". That is, those insns that were
8590 actually independent. But here we're scheduling for the
8591 decoder, and we may find things that are decodable in the
8594 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
8595 issued_this_cycle = 0;
8598 cur_uops = ix86_safe_ppro_uops (*insnp);
8600 /* If the decoders are empty, and we've a complex insn at the
8601 head of the priority queue, let it issue without complaint. */
8602 if (decode[0] == NULL)
8604 if (cur_uops == PPRO_UOPS_MANY)
8610 /* Otherwise, search for a 2-4 uop unsn to issue. */
8611 while (cur_uops != PPRO_UOPS_FEW)
8615 cur_uops = ix86_safe_ppro_uops (*--insnp);
8618 /* If so, move it to the head of the line. */
8619 if (cur_uops == PPRO_UOPS_FEW)
8620 ix86_reorder_insn (insnp, e_ready);
8622 /* Issue the head of the queue. */
8623 issued_this_cycle = 1;
8624 decode[0] = *e_ready--;
8627 /* Look for simple insns to fill in the other two slots. */
8628 for (i = 1; i < 3; ++i)
8629 if (decode[i] == NULL)
8631 if (ready >= e_ready)
8635 cur_uops = ix86_safe_ppro_uops (*insnp);
8636 while (cur_uops != PPRO_UOPS_ONE)
8640 cur_uops = ix86_safe_ppro_uops (*--insnp);
8643 /* Found one. Move it to the head of the queue and issue it. */
8644 if (cur_uops == PPRO_UOPS_ONE)
8646 ix86_reorder_insn (insnp, e_ready);
8647 decode[i] = *e_ready--;
8648 issued_this_cycle++;
8652 /* ??? Didn't find one. Ideally, here we would do a lazy split
8653 of 2-uop insns, issue one and queue the other. */
8657 if (issued_this_cycle == 0)
8658 issued_this_cycle = 1;
8659 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8662 /* We are about to being issuing insns for this clock cycle.
8663 Override the default sort algorithm to better slot instructions. */
8665 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8666 FILE *dump ATTRIBUTE_UNUSED;
8667 int sched_verbose ATTRIBUTE_UNUSED;
8670 int clock_var ATTRIBUTE_UNUSED;
8672 rtx *e_ready = ready + n_ready - 1;
8682 case PROCESSOR_PENTIUM:
8683 ix86_sched_reorder_pentium (ready, e_ready);
8686 case PROCESSOR_PENTIUMPRO:
8687 ix86_sched_reorder_ppro (ready, e_ready);
8692 return ix86_issue_rate ();
8695 /* We are about to issue INSN. Return the number of insns left on the
8696 ready queue that can be issued this cycle. */
8699 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8709 return can_issue_more - 1;
8711 case PROCESSOR_PENTIUMPRO:
8713 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
8715 if (uops == PPRO_UOPS_MANY)
8718 ix86_dump_ppro_packet (dump);
8719 ix86_sched_data.ppro.decode[0] = insn;
8720 ix86_sched_data.ppro.decode[1] = NULL;
8721 ix86_sched_data.ppro.decode[2] = NULL;
8723 ix86_dump_ppro_packet (dump);
8724 ix86_sched_data.ppro.decode[0] = NULL;
8726 else if (uops == PPRO_UOPS_FEW)
8729 ix86_dump_ppro_packet (dump);
8730 ix86_sched_data.ppro.decode[0] = insn;
8731 ix86_sched_data.ppro.decode[1] = NULL;
8732 ix86_sched_data.ppro.decode[2] = NULL;
8736 for (i = 0; i < 3; ++i)
8737 if (ix86_sched_data.ppro.decode[i] == NULL)
8739 ix86_sched_data.ppro.decode[i] = insn;
8747 ix86_dump_ppro_packet (dump);
8748 ix86_sched_data.ppro.decode[0] = NULL;
8749 ix86_sched_data.ppro.decode[1] = NULL;
8750 ix86_sched_data.ppro.decode[2] = NULL;
8754 return --ix86_sched_data.ppro.issued_this_cycle;
8758 /* Walk through INSNS and look for MEM references whose address is DSTREG or
8759 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
8763 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
8765 rtx dstref, srcref, dstreg, srcreg;
8769 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
8771 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
8775 /* Subroutine of above to actually do the updating by recursively walking
8779 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
8781 rtx dstref, srcref, dstreg, srcreg;
8783 enum rtx_code code = GET_CODE (x);
8784 const char *format_ptr = GET_RTX_FORMAT (code);
8787 if (code == MEM && XEXP (x, 0) == dstreg)
8788 MEM_COPY_ATTRIBUTES (x, dstref);
8789 else if (code == MEM && XEXP (x, 0) == srcreg)
8790 MEM_COPY_ATTRIBUTES (x, srcref);
8792 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
8794 if (*format_ptr == 'e')
8795 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
8797 else if (*format_ptr == 'E')
8798 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8799 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
8804 /* Compute the alignment given to a constant that is being placed in memory.
8805 EXP is the constant and ALIGN is the alignment that the object would
8807 The value of this function is used instead of that alignment to align
8811 ix86_constant_alignment (exp, align)
8815 if (TREE_CODE (exp) == REAL_CST)
8817 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
8819 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
8822 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
8829 /* Compute the alignment for a static variable.
8830 TYPE is the data type, and ALIGN is the alignment that
8831 the object would ordinarily have. The value of this function is used
8832 instead of that alignment to align the object. */
8835 ix86_data_alignment (type, align)
8839 if (AGGREGATE_TYPE_P (type)
8841 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8842 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
8843 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
8846 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8847 to 16byte boundary. */
8850 if (AGGREGATE_TYPE_P (type)
8852 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8853 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
8854 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8858 if (TREE_CODE (type) == ARRAY_TYPE)
8860 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8862 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8865 else if (TREE_CODE (type) == COMPLEX_TYPE)
8868 if (TYPE_MODE (type) == DCmode && align < 64)
8870 if (TYPE_MODE (type) == XCmode && align < 128)
8873 else if ((TREE_CODE (type) == RECORD_TYPE
8874 || TREE_CODE (type) == UNION_TYPE
8875 || TREE_CODE (type) == QUAL_UNION_TYPE)
8876 && TYPE_FIELDS (type))
8878 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8880 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8883 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8884 || TREE_CODE (type) == INTEGER_TYPE)
8886 if (TYPE_MODE (type) == DFmode && align < 64)
8888 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8895 /* Compute the alignment for a local variable.
8896 TYPE is the data type, and ALIGN is the alignment that
8897 the object would ordinarily have. The value of this macro is used
8898 instead of that alignment to align the object. */
8901 ix86_local_alignment (type, align)
8905 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8906 to 16byte boundary. */
8909 if (AGGREGATE_TYPE_P (type)
8911 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8912 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
8913 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8916 if (TREE_CODE (type) == ARRAY_TYPE)
8918 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8920 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8923 else if (TREE_CODE (type) == COMPLEX_TYPE)
8925 if (TYPE_MODE (type) == DCmode && align < 64)
8927 if (TYPE_MODE (type) == XCmode && align < 128)
8930 else if ((TREE_CODE (type) == RECORD_TYPE
8931 || TREE_CODE (type) == UNION_TYPE
8932 || TREE_CODE (type) == QUAL_UNION_TYPE)
8933 && TYPE_FIELDS (type))
8935 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8937 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8940 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8941 || TREE_CODE (type) == INTEGER_TYPE)
8944 if (TYPE_MODE (type) == DFmode && align < 64)
8946 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8952 /* Emit RTL insns to initialize the variable parts of a trampoline.
8953 FNADDR is an RTX for the address of the function's pure code.
8954 CXT is an RTX for the static chain value for the function. */
8956 x86_initialize_trampoline (tramp, fnaddr, cxt)
8957 rtx tramp, fnaddr, cxt;
8961 /* Compute offset from the end of the jmp to the target function. */
8962 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
8963 plus_constant (tramp, 10),
8964 NULL_RTX, 1, OPTAB_DIRECT);
8965 emit_move_insn (gen_rtx_MEM (QImode, tramp),
8966 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
8967 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
8968 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
8969 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
8970 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
8975 /* Try to load address using shorter movl instead of movabs.
8976 We may want to support movq for kernel mode, but kernel does not use
8977 trampolines at the moment. */
8978 if (x86_64_zero_extended_value (fnaddr))
8980 fnaddr = copy_to_mode_reg (DImode, fnaddr);
8981 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
8982 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
8983 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
8984 gen_lowpart (SImode, fnaddr));
8989 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
8990 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
8991 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
8995 /* Load static chain using movabs to r10. */
8996 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
8997 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
8998 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9001 /* Jump to the r11 */
9002 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9003 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9004 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9005 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9007 if (offset > TRAMPOLINE_SIZE)
9012 #define def_builtin(NAME, TYPE, CODE) \
9013 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
9014 struct builtin_description
9016 enum insn_code icode;
9018 enum ix86_builtins code;
9019 enum rtx_code comparison;
9023 static struct builtin_description bdesc_comi[] =
9025 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9026 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9027 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9028 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9029 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9030 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9031 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9032 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9033 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9034 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9035 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9036 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9039 static struct builtin_description bdesc_2arg[] =
9042 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9043 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9044 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9045 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9046 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9047 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9048 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9049 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9051 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9052 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9053 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9054 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9055 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9056 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9057 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9058 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9059 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9060 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9061 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9062 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9063 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9064 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9065 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9066 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9067 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9068 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9069 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9070 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9071 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9072 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9073 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9074 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9076 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9077 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9078 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9079 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9081 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9082 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9083 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9084 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9086 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9087 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9088 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9089 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9090 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9093 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9094 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9095 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9096 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9097 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9098 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9100 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9101 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9102 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9103 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9104 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9105 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9106 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9107 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9109 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9110 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9111 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9113 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9114 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9115 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9116 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9118 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9119 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9121 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9122 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9123 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9124 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9125 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9126 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9128 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9129 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9130 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9131 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9133 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9134 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9135 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9136 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9137 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9138 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9141 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9142 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9143 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9145 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9146 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9148 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9149 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9150 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9151 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9152 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9153 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9155 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9156 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9157 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9158 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9159 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9160 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9162 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9163 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9164 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9165 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9167 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9168 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9172 static struct builtin_description bdesc_1arg[] =
9174 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9175 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9177 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9178 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9179 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9181 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9182 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9183 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9184 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9188 /* Expand all the target specific builtins. This is not called if TARGET_MMX
9189 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9192 ix86_init_builtins ()
9194 struct builtin_description * d;
9196 tree endlink = void_list_node;
9198 tree pchar_type_node = build_pointer_type (char_type_node);
9199 tree pfloat_type_node = build_pointer_type (float_type_node);
9200 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9201 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9204 tree int_ftype_v4sf_v4sf
9205 = build_function_type (integer_type_node,
9206 tree_cons (NULL_TREE, V4SF_type_node,
9207 tree_cons (NULL_TREE,
9210 tree v4si_ftype_v4sf_v4sf
9211 = build_function_type (V4SI_type_node,
9212 tree_cons (NULL_TREE, V4SF_type_node,
9213 tree_cons (NULL_TREE,
9216 /* MMX/SSE/integer conversions. */
9217 tree int_ftype_v4sf_int
9218 = build_function_type (integer_type_node,
9219 tree_cons (NULL_TREE, V4SF_type_node,
9220 tree_cons (NULL_TREE,
9224 = build_function_type (integer_type_node,
9225 tree_cons (NULL_TREE, V4SF_type_node,
9228 = build_function_type (integer_type_node,
9229 tree_cons (NULL_TREE, V8QI_type_node,
9232 = build_function_type (integer_type_node,
9233 tree_cons (NULL_TREE, V2SI_type_node,
9236 = build_function_type (V2SI_type_node,
9237 tree_cons (NULL_TREE, integer_type_node,
9239 tree v4sf_ftype_v4sf_int
9240 = build_function_type (integer_type_node,
9241 tree_cons (NULL_TREE, V4SF_type_node,
9242 tree_cons (NULL_TREE, integer_type_node,
9244 tree v4sf_ftype_v4sf_v2si
9245 = build_function_type (V4SF_type_node,
9246 tree_cons (NULL_TREE, V4SF_type_node,
9247 tree_cons (NULL_TREE, V2SI_type_node,
9249 tree int_ftype_v4hi_int
9250 = build_function_type (integer_type_node,
9251 tree_cons (NULL_TREE, V4HI_type_node,
9252 tree_cons (NULL_TREE, integer_type_node,
9254 tree v4hi_ftype_v4hi_int_int
9255 = build_function_type (V4HI_type_node,
9256 tree_cons (NULL_TREE, V4HI_type_node,
9257 tree_cons (NULL_TREE, integer_type_node,
9258 tree_cons (NULL_TREE,
9261 /* Miscellaneous. */
9262 tree v8qi_ftype_v4hi_v4hi
9263 = build_function_type (V8QI_type_node,
9264 tree_cons (NULL_TREE, V4HI_type_node,
9265 tree_cons (NULL_TREE, V4HI_type_node,
9267 tree v4hi_ftype_v2si_v2si
9268 = build_function_type (V4HI_type_node,
9269 tree_cons (NULL_TREE, V2SI_type_node,
9270 tree_cons (NULL_TREE, V2SI_type_node,
9272 tree v4sf_ftype_v4sf_v4sf_int
9273 = build_function_type (V4SF_type_node,
9274 tree_cons (NULL_TREE, V4SF_type_node,
9275 tree_cons (NULL_TREE, V4SF_type_node,
9276 tree_cons (NULL_TREE,
9279 tree v4hi_ftype_v8qi_v8qi
9280 = build_function_type (V4HI_type_node,
9281 tree_cons (NULL_TREE, V8QI_type_node,
9282 tree_cons (NULL_TREE, V8QI_type_node,
9284 tree v2si_ftype_v4hi_v4hi
9285 = build_function_type (V2SI_type_node,
9286 tree_cons (NULL_TREE, V4HI_type_node,
9287 tree_cons (NULL_TREE, V4HI_type_node,
9289 tree v4hi_ftype_v4hi_int
9290 = build_function_type (V4HI_type_node,
9291 tree_cons (NULL_TREE, V4HI_type_node,
9292 tree_cons (NULL_TREE, integer_type_node,
9294 tree di_ftype_di_int
9295 = build_function_type (long_long_unsigned_type_node,
9296 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9297 tree_cons (NULL_TREE, integer_type_node,
9299 tree v8qi_ftype_v8qi_di
9300 = build_function_type (V8QI_type_node,
9301 tree_cons (NULL_TREE, V8QI_type_node,
9302 tree_cons (NULL_TREE,
9303 long_long_integer_type_node,
9305 tree v4hi_ftype_v4hi_di
9306 = build_function_type (V4HI_type_node,
9307 tree_cons (NULL_TREE, V4HI_type_node,
9308 tree_cons (NULL_TREE,
9309 long_long_integer_type_node,
9311 tree v2si_ftype_v2si_di
9312 = build_function_type (V2SI_type_node,
9313 tree_cons (NULL_TREE, V2SI_type_node,
9314 tree_cons (NULL_TREE,
9315 long_long_integer_type_node,
9317 tree void_ftype_void
9318 = build_function_type (void_type_node, endlink);
9319 tree void_ftype_pchar_int
9320 = build_function_type (void_type_node,
9321 tree_cons (NULL_TREE, pchar_type_node,
9322 tree_cons (NULL_TREE, integer_type_node,
9324 tree void_ftype_unsigned
9325 = build_function_type (void_type_node,
9326 tree_cons (NULL_TREE, unsigned_type_node,
9328 tree unsigned_ftype_void
9329 = build_function_type (unsigned_type_node, endlink);
9331 = build_function_type (long_long_unsigned_type_node, endlink);
9333 = build_function_type (intTI_type_node, endlink);
9334 tree v2si_ftype_v4sf
9335 = build_function_type (V2SI_type_node,
9336 tree_cons (NULL_TREE, V4SF_type_node,
9339 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9340 tree_cons (NULL_TREE, V8QI_type_node,
9341 tree_cons (NULL_TREE,
9344 tree void_ftype_v8qi_v8qi_pchar
9345 = build_function_type (void_type_node, maskmovq_args);
9346 tree v4sf_ftype_pfloat
9347 = build_function_type (V4SF_type_node,
9348 tree_cons (NULL_TREE, pfloat_type_node,
9350 tree v4sf_ftype_float
9351 = build_function_type (V4SF_type_node,
9352 tree_cons (NULL_TREE, float_type_node,
9354 tree v4sf_ftype_float_float_float_float
9355 = build_function_type (V4SF_type_node,
9356 tree_cons (NULL_TREE, float_type_node,
9357 tree_cons (NULL_TREE, float_type_node,
9358 tree_cons (NULL_TREE,
9360 tree_cons (NULL_TREE,
9363 /* @@@ the type is bogus */
9364 tree v4sf_ftype_v4sf_pv2si
9365 = build_function_type (V4SF_type_node,
9366 tree_cons (NULL_TREE, V4SF_type_node,
9367 tree_cons (NULL_TREE, pv2si_type_node,
9369 tree v4sf_ftype_pv2si_v4sf
9370 = build_function_type (V4SF_type_node,
9371 tree_cons (NULL_TREE, V4SF_type_node,
9372 tree_cons (NULL_TREE, pv2si_type_node,
9374 tree void_ftype_pfloat_v4sf
9375 = build_function_type (void_type_node,
9376 tree_cons (NULL_TREE, pfloat_type_node,
9377 tree_cons (NULL_TREE, V4SF_type_node,
9379 tree void_ftype_pdi_di
9380 = build_function_type (void_type_node,
9381 tree_cons (NULL_TREE, pdi_type_node,
9382 tree_cons (NULL_TREE,
9383 long_long_unsigned_type_node,
9385 /* Normal vector unops. */
9386 tree v4sf_ftype_v4sf
9387 = build_function_type (V4SF_type_node,
9388 tree_cons (NULL_TREE, V4SF_type_node,
9391 /* Normal vector binops. */
9392 tree v4sf_ftype_v4sf_v4sf
9393 = build_function_type (V4SF_type_node,
9394 tree_cons (NULL_TREE, V4SF_type_node,
9395 tree_cons (NULL_TREE, V4SF_type_node,
9397 tree v8qi_ftype_v8qi_v8qi
9398 = build_function_type (V8QI_type_node,
9399 tree_cons (NULL_TREE, V8QI_type_node,
9400 tree_cons (NULL_TREE, V8QI_type_node,
9402 tree v4hi_ftype_v4hi_v4hi
9403 = build_function_type (V4HI_type_node,
9404 tree_cons (NULL_TREE, V4HI_type_node,
9405 tree_cons (NULL_TREE, V4HI_type_node,
9407 tree v2si_ftype_v2si_v2si
9408 = build_function_type (V2SI_type_node,
9409 tree_cons (NULL_TREE, V2SI_type_node,
9410 tree_cons (NULL_TREE, V2SI_type_node,
9413 = build_function_type (intTI_type_node,
9414 tree_cons (NULL_TREE, intTI_type_node,
9415 tree_cons (NULL_TREE, intTI_type_node,
9418 = build_function_type (long_long_unsigned_type_node,
9419 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9420 tree_cons (NULL_TREE,
9421 long_long_unsigned_type_node,
9424 /* Add all builtins that are more or less simple operations on two
9426 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9428 /* Use one of the operands; the target can have a different mode for
9429 mask-generating compares. */
9430 enum machine_mode mode;
9435 mode = insn_data[d->icode].operand[1].mode;
9437 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9443 type = v4sf_ftype_v4sf_v4sf;
9446 type = v8qi_ftype_v8qi_v8qi;
9449 type = v4hi_ftype_v4hi_v4hi;
9452 type = v2si_ftype_v2si_v2si;
9455 type = ti_ftype_ti_ti;
9458 type = di_ftype_di_di;
9465 /* Override for comparisons. */
9466 if (d->icode == CODE_FOR_maskcmpv4sf3
9467 || d->icode == CODE_FOR_maskncmpv4sf3
9468 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9469 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9470 type = v4si_ftype_v4sf_v4sf;
9472 def_builtin (d->name, type, d->code);
9475 /* Add the remaining MMX insns with somewhat more complicated types. */
9476 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9477 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9478 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9479 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9480 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9481 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9482 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9483 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9484 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9486 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9487 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9488 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9490 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9491 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9493 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9494 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9496 /* Everything beyond this point is SSE only. */
9500 /* comi/ucomi insns. */
9501 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9502 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9504 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9505 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9506 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9508 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9509 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9510 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9511 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9512 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9513 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9515 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9516 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9518 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9520 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9521 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9522 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9523 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9524 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9525 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9527 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9528 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9529 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9530 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9532 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9533 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9534 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9535 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9537 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9538 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9540 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9542 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9543 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9544 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9545 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9546 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9547 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9549 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9551 /* Composite intrinsics. */
9552 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9553 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9554 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9555 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9556 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9557 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9558 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9561 /* Errors in the source file can cause expand_expr to return const0_rtx
9562 where we expect a vector. To avoid crashing, use one of the vector
9563 clear instructions. */
9565 safe_vector_operand (x, mode)
9567 enum machine_mode mode;
9569 if (x != const0_rtx)
9571 x = gen_reg_rtx (mode);
9573 if (VALID_MMX_REG_MODE (mode))
9574 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9575 : gen_rtx_SUBREG (DImode, x, 0)));
9577 emit_insn (gen_sse_clrti (mode == TImode ? x
9578 : gen_rtx_SUBREG (TImode, x, 0)));
9582 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
9585 ix86_expand_binop_builtin (icode, arglist, target)
9586 enum insn_code icode;
9591 tree arg0 = TREE_VALUE (arglist);
9592 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9593 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9594 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9595 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9596 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9597 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9599 if (VECTOR_MODE_P (mode0))
9600 op0 = safe_vector_operand (op0, mode0);
9601 if (VECTOR_MODE_P (mode1))
9602 op1 = safe_vector_operand (op1, mode1);
9605 || GET_MODE (target) != tmode
9606 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9607 target = gen_reg_rtx (tmode);
9609 /* In case the insn wants input operands in modes different from
9610 the result, abort. */
9611 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9614 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9615 op0 = copy_to_mode_reg (mode0, op0);
9616 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9617 op1 = copy_to_mode_reg (mode1, op1);
9619 pat = GEN_FCN (icode) (target, op0, op1);
9626 /* Subroutine of ix86_expand_builtin to take care of stores. */
9629 ix86_expand_store_builtin (icode, arglist, shuffle)
9630 enum insn_code icode;
9635 tree arg0 = TREE_VALUE (arglist);
9636 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9637 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9638 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9639 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9640 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9642 if (VECTOR_MODE_P (mode1))
9643 op1 = safe_vector_operand (op1, mode1);
9645 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9646 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9647 op1 = copy_to_mode_reg (mode1, op1);
9649 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9650 pat = GEN_FCN (icode) (op0, op1);
9656 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
9659 ix86_expand_unop_builtin (icode, arglist, target, do_load)
9660 enum insn_code icode;
9666 tree arg0 = TREE_VALUE (arglist);
9667 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9668 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9669 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9672 || GET_MODE (target) != tmode
9673 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9674 target = gen_reg_rtx (tmode);
9676 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9679 if (VECTOR_MODE_P (mode0))
9680 op0 = safe_vector_operand (op0, mode0);
9682 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9683 op0 = copy_to_mode_reg (mode0, op0);
9686 pat = GEN_FCN (icode) (target, op0);
9693 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9694 sqrtss, rsqrtss, rcpss. */
9697 ix86_expand_unop1_builtin (icode, arglist, target)
9698 enum insn_code icode;
9703 tree arg0 = TREE_VALUE (arglist);
9704 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9705 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9706 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9709 || GET_MODE (target) != tmode
9710 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9711 target = gen_reg_rtx (tmode);
9713 if (VECTOR_MODE_P (mode0))
9714 op0 = safe_vector_operand (op0, mode0);
9716 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9717 op0 = copy_to_mode_reg (mode0, op0);
9719 pat = GEN_FCN (icode) (target, op0, op0);
9726 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9729 ix86_expand_sse_compare (d, arglist, target)
9730 struct builtin_description *d;
9735 tree arg0 = TREE_VALUE (arglist);
9736 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9737 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9738 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9740 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
9741 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
9742 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
9743 enum rtx_code comparison = d->comparison;
9745 if (VECTOR_MODE_P (mode0))
9746 op0 = safe_vector_operand (op0, mode0);
9747 if (VECTOR_MODE_P (mode1))
9748 op1 = safe_vector_operand (op1, mode1);
9750 /* Swap operands if we have a comparison that isn't available in
9754 target = gen_reg_rtx (tmode);
9755 emit_move_insn (target, op1);
9758 comparison = swap_condition (comparison);
9761 || GET_MODE (target) != tmode
9762 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
9763 target = gen_reg_rtx (tmode);
9765 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
9766 op0 = copy_to_mode_reg (mode0, op0);
9767 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
9768 op1 = copy_to_mode_reg (mode1, op1);
9770 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9771 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
9778 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
9781 ix86_expand_sse_comi (d, arglist, target)
9782 struct builtin_description *d;
9787 tree arg0 = TREE_VALUE (arglist);
9788 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9789 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9790 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9792 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
9793 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
9794 enum rtx_code comparison = d->comparison;
9796 if (VECTOR_MODE_P (mode0))
9797 op0 = safe_vector_operand (op0, mode0);
9798 if (VECTOR_MODE_P (mode1))
9799 op1 = safe_vector_operand (op1, mode1);
9801 /* Swap operands if we have a comparison that isn't available in
9808 comparison = swap_condition (comparison);
9811 target = gen_reg_rtx (SImode);
9812 emit_move_insn (target, const0_rtx);
9813 target = gen_rtx_SUBREG (QImode, target, 0);
9815 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
9816 op0 = copy_to_mode_reg (mode0, op0);
9817 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
9818 op1 = copy_to_mode_reg (mode1, op1);
9820 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9821 pat = GEN_FCN (d->icode) (op0, op1, op2);
9825 emit_insn (gen_setcc_2 (target, op2));
9830 /* Expand an expression EXP that calls a built-in function,
9831 with result going to TARGET if that's convenient
9832 (and in mode MODE if that's convenient).
9833 SUBTARGET may be used as the target for computing one of EXP's operands.
9834 IGNORE is nonzero if the value is to be ignored. */
9837 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
9840 rtx subtarget ATTRIBUTE_UNUSED;
9841 enum machine_mode mode ATTRIBUTE_UNUSED;
9842 int ignore ATTRIBUTE_UNUSED;
9844 struct builtin_description *d;
9846 enum insn_code icode;
9847 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9848 tree arglist = TREE_OPERAND (exp, 1);
9849 tree arg0, arg1, arg2, arg3;
9850 rtx op0, op1, op2, pat;
9851 enum machine_mode tmode, mode0, mode1, mode2;
9852 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9856 case IX86_BUILTIN_EMMS:
9857 emit_insn (gen_emms ());
9860 case IX86_BUILTIN_SFENCE:
9861 emit_insn (gen_sfence ());
9864 case IX86_BUILTIN_M_FROM_INT:
9865 target = gen_reg_rtx (DImode);
9866 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9867 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
9870 case IX86_BUILTIN_M_TO_INT:
9871 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9872 op0 = copy_to_mode_reg (DImode, op0);
9873 target = gen_reg_rtx (SImode);
9874 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
9877 case IX86_BUILTIN_PEXTRW:
9878 icode = CODE_FOR_mmx_pextrw;
9879 arg0 = TREE_VALUE (arglist);
9880 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9881 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9882 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9883 tmode = insn_data[icode].operand[0].mode;
9884 mode0 = insn_data[icode].operand[1].mode;
9885 mode1 = insn_data[icode].operand[2].mode;
9887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9888 op0 = copy_to_mode_reg (mode0, op0);
9889 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9891 /* @@@ better error message */
9892 error ("selector must be an immediate");
9896 || GET_MODE (target) != tmode
9897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9898 target = gen_reg_rtx (tmode);
9899 pat = GEN_FCN (icode) (target, op0, op1);
9905 case IX86_BUILTIN_PINSRW:
9906 icode = CODE_FOR_mmx_pinsrw;
9907 arg0 = TREE_VALUE (arglist);
9908 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9909 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9910 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9911 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9912 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9913 tmode = insn_data[icode].operand[0].mode;
9914 mode0 = insn_data[icode].operand[1].mode;
9915 mode1 = insn_data[icode].operand[2].mode;
9916 mode2 = insn_data[icode].operand[3].mode;
9918 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9919 op0 = copy_to_mode_reg (mode0, op0);
9920 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9921 op1 = copy_to_mode_reg (mode1, op1);
9922 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9924 /* @@@ better error message */
9925 error ("selector must be an immediate");
9929 || GET_MODE (target) != tmode
9930 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9931 target = gen_reg_rtx (tmode);
9932 pat = GEN_FCN (icode) (target, op0, op1, op2);
9938 case IX86_BUILTIN_MASKMOVQ:
9939 icode = CODE_FOR_mmx_maskmovq;
9940 /* Note the arg order is different from the operand order. */
9941 arg1 = TREE_VALUE (arglist);
9942 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
9943 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9944 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9945 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9946 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9947 mode0 = insn_data[icode].operand[0].mode;
9948 mode1 = insn_data[icode].operand[1].mode;
9949 mode2 = insn_data[icode].operand[2].mode;
9951 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9952 op0 = copy_to_mode_reg (mode0, op0);
9953 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9954 op1 = copy_to_mode_reg (mode1, op1);
9955 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
9956 op2 = copy_to_mode_reg (mode2, op2);
9957 pat = GEN_FCN (icode) (op0, op1, op2);
9963 case IX86_BUILTIN_SQRTSS:
9964 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
9965 case IX86_BUILTIN_RSQRTSS:
9966 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
9967 case IX86_BUILTIN_RCPSS:
9968 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
9970 case IX86_BUILTIN_LOADAPS:
9971 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
9973 case IX86_BUILTIN_LOADUPS:
9974 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
9976 case IX86_BUILTIN_STOREAPS:
9977 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
9978 case IX86_BUILTIN_STOREUPS:
9979 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
9981 case IX86_BUILTIN_LOADSS:
9982 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
9984 case IX86_BUILTIN_STORESS:
9985 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
9987 case IX86_BUILTIN_LOADHPS:
9988 case IX86_BUILTIN_LOADLPS:
9989 icode = (fcode == IX86_BUILTIN_LOADHPS
9990 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
9991 arg0 = TREE_VALUE (arglist);
9992 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9993 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9994 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9995 tmode = insn_data[icode].operand[0].mode;
9996 mode0 = insn_data[icode].operand[1].mode;
9997 mode1 = insn_data[icode].operand[2].mode;
9999 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10000 op0 = copy_to_mode_reg (mode0, op0);
10001 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10003 || GET_MODE (target) != tmode
10004 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10005 target = gen_reg_rtx (tmode);
10006 pat = GEN_FCN (icode) (target, op0, op1);
10012 case IX86_BUILTIN_STOREHPS:
10013 case IX86_BUILTIN_STORELPS:
10014 icode = (fcode == IX86_BUILTIN_STOREHPS
10015 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10016 arg0 = TREE_VALUE (arglist);
10017 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10018 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10019 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10020 mode0 = insn_data[icode].operand[1].mode;
10021 mode1 = insn_data[icode].operand[2].mode;
10023 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10024 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10025 op1 = copy_to_mode_reg (mode1, op1);
10027 pat = GEN_FCN (icode) (op0, op0, op1);
10033 case IX86_BUILTIN_MOVNTPS:
10034 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10035 case IX86_BUILTIN_MOVNTQ:
10036 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10038 case IX86_BUILTIN_LDMXCSR:
10039 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10040 target = assign_386_stack_local (SImode, 0);
10041 emit_move_insn (target, op0);
10042 emit_insn (gen_ldmxcsr (target));
10045 case IX86_BUILTIN_STMXCSR:
10046 target = assign_386_stack_local (SImode, 0);
10047 emit_insn (gen_stmxcsr (target));
10048 return copy_to_mode_reg (SImode, target);
10050 case IX86_BUILTIN_PREFETCH:
10051 icode = CODE_FOR_prefetch;
10052 arg0 = TREE_VALUE (arglist);
10053 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10056 mode0 = insn_data[icode].operand[0].mode;
10057 mode1 = insn_data[icode].operand[1].mode;
10059 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10061 /* @@@ better error message */
10062 error ("selector must be an immediate");
10066 op0 = copy_to_mode_reg (Pmode, op0);
10067 pat = GEN_FCN (icode) (op0, op1);
10073 case IX86_BUILTIN_SHUFPS:
10074 icode = CODE_FOR_sse_shufps;
10075 arg0 = TREE_VALUE (arglist);
10076 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10077 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10078 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10079 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10080 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10081 tmode = insn_data[icode].operand[0].mode;
10082 mode0 = insn_data[icode].operand[1].mode;
10083 mode1 = insn_data[icode].operand[2].mode;
10084 mode2 = insn_data[icode].operand[3].mode;
10086 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10087 op0 = copy_to_mode_reg (mode0, op0);
10088 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10089 op1 = copy_to_mode_reg (mode1, op1);
10090 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10092 /* @@@ better error message */
10093 error ("mask must be an immediate");
10097 || GET_MODE (target) != tmode
10098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10099 target = gen_reg_rtx (tmode);
10100 pat = GEN_FCN (icode) (target, op0, op1, op2);
10106 case IX86_BUILTIN_PSHUFW:
10107 icode = CODE_FOR_mmx_pshufw;
10108 arg0 = TREE_VALUE (arglist);
10109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10110 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10111 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10112 tmode = insn_data[icode].operand[0].mode;
10113 mode0 = insn_data[icode].operand[2].mode;
10114 mode1 = insn_data[icode].operand[3].mode;
10116 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10117 op0 = copy_to_mode_reg (mode0, op0);
10118 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10120 /* @@@ better error message */
10121 error ("mask must be an immediate");
10125 || GET_MODE (target) != tmode
10126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10127 target = gen_reg_rtx (tmode);
10128 pat = GEN_FCN (icode) (target, target, op0, op1);
10134 /* Composite intrinsics. */
10135 case IX86_BUILTIN_SETPS1:
10136 target = assign_386_stack_local (SFmode, 0);
10137 arg0 = TREE_VALUE (arglist);
10138 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
10139 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10140 op0 = gen_reg_rtx (V4SFmode);
10141 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
10142 XEXP (target, 0))));
10143 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10146 case IX86_BUILTIN_SETPS:
10147 target = assign_386_stack_local (V4SFmode, 0);
10148 op0 = change_address (target, SFmode, XEXP (target, 0));
10149 arg0 = TREE_VALUE (arglist);
10150 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10151 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10152 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
10153 emit_move_insn (op0,
10154 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10155 emit_move_insn (adj_offsettable_operand (op0, 4),
10156 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
10157 emit_move_insn (adj_offsettable_operand (op0, 8),
10158 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
10159 emit_move_insn (adj_offsettable_operand (op0, 12),
10160 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10161 op0 = gen_reg_rtx (V4SFmode);
10162 emit_insn (gen_sse_movaps (op0, target));
10165 case IX86_BUILTIN_CLRPS:
10166 target = gen_reg_rtx (TImode);
10167 emit_insn (gen_sse_clrti (target));
10170 case IX86_BUILTIN_LOADRPS:
10171 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10172 gen_reg_rtx (V4SFmode), 1);
10173 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10176 case IX86_BUILTIN_LOADPS1:
10177 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10178 gen_reg_rtx (V4SFmode), 1);
10179 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10182 case IX86_BUILTIN_STOREPS1:
10183 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10184 case IX86_BUILTIN_STORERPS:
10185 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10187 case IX86_BUILTIN_MMX_ZERO:
10188 target = gen_reg_rtx (DImode);
10189 emit_insn (gen_mmx_clrdi (target));
10196 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10197 if (d->code == fcode)
10199 /* Compares are treated specially. */
10200 if (d->icode == CODE_FOR_maskcmpv4sf3
10201 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10202 || d->icode == CODE_FOR_maskncmpv4sf3
10203 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10204 return ix86_expand_sse_compare (d, arglist, target);
10206 return ix86_expand_binop_builtin (d->icode, arglist, target);
10209 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10210 if (d->code == fcode)
10211 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
10213 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10214 if (d->code == fcode)
10215 return ix86_expand_sse_comi (d, arglist, target);
10217 /* @@@ Should really do something sensible here. */
10221 /* Store OPERAND to the memory after reload is completed. This means
10222 that we can't easilly use assign_stack_local. */
10224 ix86_force_to_memory (mode, operand)
10225 enum machine_mode mode;
10229 if (!reload_completed)
10231 if (TARGET_64BIT && TARGET_RED_ZONE)
10233 result = gen_rtx_MEM (mode,
10234 gen_rtx_PLUS (Pmode,
10236 GEN_INT (-RED_ZONE_SIZE)));
10237 emit_move_insn (result, operand);
10239 else if (TARGET_64BIT && !TARGET_RED_ZONE)
10245 operand = gen_lowpart (DImode, operand);
10249 gen_rtx_SET (VOIDmode,
10250 gen_rtx_MEM (DImode,
10251 gen_rtx_PRE_DEC (DImode,
10252 stack_pointer_rtx)),
10258 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10267 split_di (&operand, 1, operands, operands + 1);
10269 gen_rtx_SET (VOIDmode,
10270 gen_rtx_MEM (SImode,
10271 gen_rtx_PRE_DEC (Pmode,
10272 stack_pointer_rtx)),
10275 gen_rtx_SET (VOIDmode,
10276 gen_rtx_MEM (SImode,
10277 gen_rtx_PRE_DEC (Pmode,
10278 stack_pointer_rtx)),
10283 /* It is better to store HImodes as SImodes. */
10284 if (!TARGET_PARTIAL_REG_STALL)
10285 operand = gen_lowpart (SImode, operand);
10289 gen_rtx_SET (VOIDmode,
10290 gen_rtx_MEM (GET_MODE (operand),
10291 gen_rtx_PRE_DEC (SImode,
10292 stack_pointer_rtx)),
10298 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10303 /* Free operand from the memory. */
10305 ix86_free_from_memory (mode)
10306 enum machine_mode mode;
10308 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10312 if (mode == DImode || TARGET_64BIT)
10314 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10318 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10319 to pop or add instruction if registers are available. */
10320 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10321 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10326 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10327 QImode must go into class Q_REGS.
10328 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10329 movdf to do mem-to-mem moves through integer regs. */
10331 ix86_preferred_reload_class (x, class)
10333 enum reg_class class;
10335 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10337 /* SSE can't load any constant directly yet. */
10338 if (SSE_CLASS_P (class))
10340 /* Floats can load 0 and 1. */
10341 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10343 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10344 if (MAYBE_SSE_CLASS_P (class))
10345 return (reg_class_subset_p (class, GENERAL_REGS)
10346 ? GENERAL_REGS : FLOAT_REGS);
10350 /* General regs can load everything. */
10351 if (reg_class_subset_p (class, GENERAL_REGS))
10352 return GENERAL_REGS;
10353 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10354 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10357 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10359 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10364 /* If we are copying between general and FP registers, we need a memory
10365 location. The same is true for SSE and MMX registers.
10367 The macro can't work reliably when one of the CLASSES is class containing
10368 registers from multiple units (SSE, MMX, integer). We avoid this by never
10369 combining those units in single alternative in the machine description.
10370 Ensure that this constraint holds to avoid unexpected surprises.
10372 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10373 enforce these sanity checks. */
10375 ix86_secondary_memory_needed (class1, class2, mode, strict)
10376 enum reg_class class1, class2;
10377 enum machine_mode mode;
10380 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10381 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10382 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10383 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10384 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10385 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10392 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10393 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10394 && (mode) != SImode)
10395 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10396 && (mode) != SImode));
10398 /* Return the cost of moving data from a register in class CLASS1 to
10399 one in class CLASS2.
10401 It is not required that the cost always equal 2 when FROM is the same as TO;
10402 on some machines it is expensive to move between registers if they are not
10403 general registers. */
10405 ix86_register_move_cost (mode, class1, class2)
10406 enum machine_mode mode;
10407 enum reg_class class1, class2;
10409 /* In case we require secondary memory, compute cost of the store followed
10410 by load. In case of copying from general_purpose_register we may emit
10411 multiple stores followed by single load causing memory size mismatch
10412 stall. Count this as arbitarily high cost of 20. */
10413 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10416 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
10418 return (MEMORY_MOVE_COST (mode, class1, 0)
10419 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
10421 /* Moves between SSE/MMX and integer unit are expensive. */
10422 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10423 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
10424 return ix86_cost->mmxsse_to_integer;
10425 if (MAYBE_FLOAT_CLASS_P (class1))
10426 return ix86_cost->fp_move;
10427 if (MAYBE_SSE_CLASS_P (class1))
10428 return ix86_cost->sse_move;
10429 if (MAYBE_MMX_CLASS_P (class1))
10430 return ix86_cost->mmx_move;
10434 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10436 ix86_hard_regno_mode_ok (regno, mode)
10438 enum machine_mode mode;
10440 /* Flags and only flags can only hold CCmode values. */
10441 if (CC_REGNO_P (regno))
10442 return GET_MODE_CLASS (mode) == MODE_CC;
10443 if (GET_MODE_CLASS (mode) == MODE_CC
10444 || GET_MODE_CLASS (mode) == MODE_RANDOM
10445 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10447 if (FP_REGNO_P (regno))
10448 return VALID_FP_MODE_P (mode);
10449 if (SSE_REGNO_P (regno))
10450 return VALID_SSE_REG_MODE (mode);
10451 if (MMX_REGNO_P (regno))
10452 return VALID_MMX_REG_MODE (mode);
10453 /* We handle both integer and floats in the general purpose registers.
10454 In future we should be able to handle vector modes as well. */
10455 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10457 /* Take care for QImode values - they can be in non-QI regs, but then
10458 they do cause partial register stalls. */
10459 if (regno < 4 || mode != QImode || TARGET_64BIT)
10461 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10464 /* Return the cost of moving data of mode M between a
10465 register and memory. A value of 2 is the default; this cost is
10466 relative to those in `REGISTER_MOVE_COST'.
10468 If moving between registers and memory is more expensive than
10469 between two registers, you should define this macro to express the
10472 Model also increased moving costs of QImode registers in non
10476 ix86_memory_move_cost (mode, class, in)
10477 enum machine_mode mode;
10478 enum reg_class class;
10481 if (FLOAT_CLASS_P (class))
10499 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10501 if (SSE_CLASS_P (class))
10504 switch (GET_MODE_SIZE (mode))
10518 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10520 if (MMX_CLASS_P (class))
10523 switch (GET_MODE_SIZE (mode))
10534 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10536 switch (GET_MODE_SIZE (mode))
10540 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10541 : ix86_cost->movzbl_load);
10543 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10544 : ix86_cost->int_store[0] + 4);
10547 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10549 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10550 if (mode == TFmode)
10552 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
10553 * (int) GET_MODE_SIZE (mode) / 4);