1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-attr.h"
41 #include "basic-block.h"
44 #ifndef CHECK_STACK_LIMIT
45 #define CHECK_STACK_LIMIT -1
48 /* Processor costs (relative to an add) */
49 struct processor_costs i386_cost = { /* 386 specific costs */
50 1, /* cost of an add instruction */
51 1, /* cost of a lea instruction */
52 3, /* variable shift costs */
53 2, /* constant shift costs */
54 6, /* cost of starting a multiply */
55 1, /* cost of multiply per each bit set */
56 23, /* cost of a divide/mod */
57 15, /* "large" insn */
59 4, /* cost for loading QImode using movzbl */
60 {2, 4, 2}, /* cost of loading integer registers
61 in QImode, HImode and SImode.
62 Relative to reg-reg move (2). */
63 {2, 4, 2}, /* cost of storing integer registers */
64 2, /* cost of reg,reg fld/fst */
65 {8, 8, 8}, /* cost of loading fp registers
66 in SFmode, DFmode and XFmode */
67 {8, 8, 8}, /* cost of loading integer registers */
68 2, /* cost of moving MMX register */
69 {4, 8}, /* cost of loading MMX registers
70 in SImode and DImode */
71 {4, 8}, /* cost of storing MMX registers
72 in SImode and DImode */
73 2, /* cost of moving SSE register */
74 {4, 8, 16}, /* cost of loading SSE registers
75 in SImode, DImode and TImode */
76 {4, 8, 16}, /* cost of storing SSE registers
77 in SImode, DImode and TImode */
78 3, /* MMX or SSE register to integer */
81 struct processor_costs i486_cost = { /* 486 specific costs */
82 1, /* cost of an add instruction */
83 1, /* cost of a lea instruction */
84 3, /* variable shift costs */
85 2, /* constant shift costs */
86 12, /* cost of starting a multiply */
87 1, /* cost of multiply per each bit set */
88 40, /* cost of a divide/mod */
89 15, /* "large" insn */
91 4, /* cost for loading QImode using movzbl */
92 {2, 4, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 4, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {8, 8, 8}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {8, 8, 8}, /* cost of loading integer registers */
100 2, /* cost of moving MMX register */
101 {4, 8}, /* cost of loading MMX registers
102 in SImode and DImode */
103 {4, 8}, /* cost of storing MMX registers
104 in SImode and DImode */
105 2, /* cost of moving SSE register */
106 {4, 8, 16}, /* cost of loading SSE registers
107 in SImode, DImode and TImode */
108 {4, 8, 16}, /* cost of storing SSE registers
109 in SImode, DImode and TImode */
110 3 /* MMX or SSE register to integer */
113 struct processor_costs pentium_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 4, /* variable shift costs */
117 1, /* constant shift costs */
118 11, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 25, /* cost of a divide/mod */
121 8, /* "large" insn */
123 6, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6}, /* cost of loading integer registers */
132 8, /* cost of moving MMX register */
133 {8, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {8, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3 /* MMX or SSE register to integer */
145 struct processor_costs pentiumpro_cost = {
146 1, /* cost of an add instruction */
147 1, /* cost of a lea instruction */
148 1, /* variable shift costs */
149 1, /* constant shift costs */
150 4, /* cost of starting a multiply */
151 0, /* cost of multiply per each bit set */
152 17, /* cost of a divide/mod */
153 8, /* "large" insn */
155 2, /* cost for loading QImode using movzbl */
156 {4, 4, 4}, /* cost of loading integer registers
157 in QImode, HImode and SImode.
158 Relative to reg-reg move (2). */
159 {2, 2, 2}, /* cost of storing integer registers */
160 2, /* cost of reg,reg fld/fst */
161 {2, 2, 6}, /* cost of loading fp registers
162 in SFmode, DFmode and XFmode */
163 {4, 4, 6}, /* cost of loading integer registers */
164 2, /* cost of moving MMX register */
165 {2, 2}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {2, 2}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {2, 2, 8}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {2, 2, 8}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3 /* MMX or SSE register to integer */
177 struct processor_costs k6_cost = {
178 1, /* cost of an add instruction */
179 2, /* cost of a lea instruction */
180 1, /* variable shift costs */
181 1, /* constant shift costs */
182 3, /* cost of starting a multiply */
183 0, /* cost of multiply per each bit set */
184 18, /* cost of a divide/mod */
185 8, /* "large" insn */
187 3, /* cost for loading QImode using movzbl */
188 {4, 5, 4}, /* cost of loading integer registers
189 in QImode, HImode and SImode.
190 Relative to reg-reg move (2). */
191 {2, 3, 2}, /* cost of storing integer registers */
192 4, /* cost of reg,reg fld/fst */
193 {6, 6, 6}, /* cost of loading fp registers
194 in SFmode, DFmode and XFmode */
195 {4, 4, 4}, /* cost of loading integer registers */
196 2, /* cost of moving MMX register */
197 {2, 2}, /* cost of loading MMX registers
198 in SImode and DImode */
199 {2, 2}, /* cost of storing MMX registers
200 in SImode and DImode */
201 2, /* cost of moving SSE register */
202 {2, 2, 8}, /* cost of loading SSE registers
203 in SImode, DImode and TImode */
204 {2, 2, 8}, /* cost of storing SSE registers
205 in SImode, DImode and TImode */
206 6 /* MMX or SSE register to integer */
209 struct processor_costs athlon_cost = {
210 1, /* cost of an add instruction */
211 2, /* cost of a lea instruction */
212 1, /* variable shift costs */
213 1, /* constant shift costs */
214 5, /* cost of starting a multiply */
215 0, /* cost of multiply per each bit set */
216 42, /* cost of a divide/mod */
217 8, /* "large" insn */
219 4, /* cost for loading QImode using movzbl */
220 {4, 5, 4}, /* cost of loading integer registers
221 in QImode, HImode and SImode.
222 Relative to reg-reg move (2). */
223 {2, 3, 2}, /* cost of storing integer registers */
224 4, /* cost of reg,reg fld/fst */
225 {6, 6, 20}, /* cost of loading fp registers
226 in SFmode, DFmode and XFmode */
227 {4, 4, 16}, /* cost of loading integer registers */
228 2, /* cost of moving MMX register */
229 {2, 2}, /* cost of loading MMX registers
230 in SImode and DImode */
231 {2, 2}, /* cost of storing MMX registers
232 in SImode and DImode */
233 2, /* cost of moving SSE register */
234 {2, 2, 8}, /* cost of loading SSE registers
235 in SImode, DImode and TImode */
236 {2, 2, 8}, /* cost of storing SSE registers
237 in SImode, DImode and TImode */
238 6 /* MMX or SSE register to integer */
241 struct processor_costs pentium4_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 8, /* variable shift costs */
245 8, /* constant shift costs */
246 30, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 112, /* cost of a divide/mod */
249 16, /* "large" insn */
251 2, /* cost for loading QImode using movzbl */
252 {4, 5, 4}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 3, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of loading integer registers */
260 2, /* cost of moving MMX register */
261 {2, 2}, /* cost of loading MMX registers
262 in SImode and DImode */
263 {2, 2}, /* cost of storing MMX registers
264 in SImode and DImode */
265 12, /* cost of moving SSE register */
266 {12, 12, 12}, /* cost of loading SSE registers
267 in SImode, DImode and TImode */
268 {2, 2, 8}, /* cost of storing SSE registers
269 in SImode, DImode and TImode */
270 10, /* MMX or SSE register to integer */
273 struct processor_costs *ix86_cost = &pentium_cost;
275 /* Processor feature/optimization bitmasks. */
276 #define m_386 (1<<PROCESSOR_I386)
277 #define m_486 (1<<PROCESSOR_I486)
278 #define m_PENT (1<<PROCESSOR_PENTIUM)
279 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
280 #define m_K6 (1<<PROCESSOR_K6)
281 #define m_ATHLON (1<<PROCESSOR_ATHLON)
282 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
284 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
285 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
286 const int x86_zero_extend_with_and = m_486 | m_PENT;
287 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
288 const int x86_double_with_add = ~m_386;
289 const int x86_use_bit_test = m_386;
290 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
291 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
292 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
293 const int x86_branch_hints = m_PENT4;
294 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
295 const int x86_partial_reg_stall = m_PPRO;
296 const int x86_use_loop = m_K6;
297 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
298 const int x86_use_mov0 = m_K6;
299 const int x86_use_cltd = ~(m_PENT | m_K6);
300 const int x86_read_modify_write = ~m_PENT;
301 const int x86_read_modify = ~(m_PENT | m_PPRO);
302 const int x86_split_long_moves = m_PPRO;
303 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
304 const int x86_single_stringop = m_386 | m_PENT4;
305 const int x86_qimode_math = ~(0);
306 const int x86_promote_qi_regs = 0;
307 const int x86_himode_math = ~(m_PPRO);
308 const int x86_promote_hi_regs = m_PPRO;
309 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
310 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
311 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
312 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
313 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
314 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
315 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
317 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
319 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
320 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
321 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
323 /* Array of the smallest class containing reg number REGNO, indexed by
324 REGNO. Used by REGNO_REG_CLASS in i386.h. */
326 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
329 AREG, DREG, CREG, BREG,
331 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
333 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
334 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
337 /* flags, fpsr, dirflag, frame */
338 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
339 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
341 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
343 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
344 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
345 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
349 /* The "default" register map used in 32bit mode. */
351 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
353 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
354 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
355 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
362 /* The "default" register map used in 64bit mode. */
363 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
365 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
366 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
367 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
368 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
369 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
370 8,9,10,11,12,13,14,15, /* extended integer registers */
371 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
374 /* Define the register numbers to be used in Dwarf debugging information.
375 The SVR4 reference port C compiler uses the following register numbers
376 in its Dwarf output code:
377 0 for %eax (gcc regno = 0)
378 1 for %ecx (gcc regno = 2)
379 2 for %edx (gcc regno = 1)
380 3 for %ebx (gcc regno = 3)
381 4 for %esp (gcc regno = 7)
382 5 for %ebp (gcc regno = 6)
383 6 for %esi (gcc regno = 4)
384 7 for %edi (gcc regno = 5)
385 The following three DWARF register numbers are never generated by
386 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
387 believes these numbers have these meanings.
388 8 for %eip (no gcc equivalent)
389 9 for %eflags (gcc regno = 17)
390 10 for %trapno (no gcc equivalent)
391 It is not at all clear how we should number the FP stack registers
392 for the x86 architecture. If the version of SDB on x86/svr4 were
393 a bit less brain dead with respect to floating-point then we would
394 have a precedent to follow with respect to DWARF register numbers
395 for x86 FP registers, but the SDB on x86/svr4 is so completely
396 broken with respect to FP registers that it is hardly worth thinking
397 of it as something to strive for compatibility with.
398 The version of x86/svr4 SDB I have at the moment does (partially)
399 seem to believe that DWARF register number 11 is associated with
400 the x86 register %st(0), but that's about all. Higher DWARF
401 register numbers don't seem to be associated with anything in
402 particular, and even for DWARF regno 11, SDB only seems to under-
403 stand that it should say that a variable lives in %st(0) (when
404 asked via an `=' command) if we said it was in DWARF regno 11,
405 but SDB still prints garbage when asked for the value of the
406 variable in question (via a `/' command).
407 (Also note that the labels SDB prints for various FP stack regs
408 when doing an `x' command are all wrong.)
409 Note that these problems generally don't affect the native SVR4
410 C compiler because it doesn't allow the use of -O with -g and
411 because when it is *not* optimizing, it allocates a memory
412 location for each floating-point variable, and the memory
413 location is what gets described in the DWARF AT_location
414 attribute for the variable in question.
415 Regardless of the severe mental illness of the x86/svr4 SDB, we
416 do something sensible here and we use the following DWARF
417 register numbers. Note that these are all stack-top-relative
419 11 for %st(0) (gcc regno = 8)
420 12 for %st(1) (gcc regno = 9)
421 13 for %st(2) (gcc regno = 10)
422 14 for %st(3) (gcc regno = 11)
423 15 for %st(4) (gcc regno = 12)
424 16 for %st(5) (gcc regno = 13)
425 17 for %st(6) (gcc regno = 14)
426 18 for %st(7) (gcc regno = 15)
428 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
430 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
431 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
432 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
433 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
434 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
435 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
436 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
439 /* Test and compare insns in i386.md store the information needed to
440 generate branch and scc insns here. */
442 struct rtx_def *ix86_compare_op0 = NULL_RTX;
443 struct rtx_def *ix86_compare_op1 = NULL_RTX;
445 #define MAX_386_STACK_LOCALS 2
446 /* Size of the register save area. */
447 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
449 /* Define the structure for the machine field in struct function. */
450 struct machine_function
452 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
453 int save_varrargs_registers;
454 int accesses_prev_frame;
457 #define ix86_stack_locals (cfun->machine->stack_locals)
458 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
460 /* Structure describing stack frame layout.
461 Stack grows downward:
467 saved frame pointer if frame_pointer_needed
468 <- HARD_FRAME_POINTER
474 > to_allocate <- FRAME_POINTER
486 int outgoing_arguments_size;
489 HOST_WIDE_INT to_allocate;
490 /* The offsets relative to ARG_POINTER. */
491 HOST_WIDE_INT frame_pointer_offset;
492 HOST_WIDE_INT hard_frame_pointer_offset;
493 HOST_WIDE_INT stack_pointer_offset;
496 /* Code model option as passed by user. */
497 const char *ix86_cmodel_string;
499 enum cmodel ix86_cmodel;
501 /* which cpu are we scheduling for */
502 enum processor_type ix86_cpu;
504 /* which instruction set architecture to use. */
507 /* Strings to hold which cpu and instruction set architecture to use. */
508 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
509 const char *ix86_arch_string; /* for -march=<xxx> */
511 /* # of registers to use to pass arguments. */
512 const char *ix86_regparm_string;
514 /* ix86_regparm_string as a number */
517 /* Alignment to use for loops and jumps: */
519 /* Power of two alignment for loops. */
520 const char *ix86_align_loops_string;
522 /* Power of two alignment for non-loop jumps. */
523 const char *ix86_align_jumps_string;
525 /* Power of two alignment for stack boundary in bytes. */
526 const char *ix86_preferred_stack_boundary_string;
528 /* Preferred alignment for stack boundary in bits. */
529 int ix86_preferred_stack_boundary;
531 /* Values 1-5: see jump.c */
532 int ix86_branch_cost;
533 const char *ix86_branch_cost_string;
535 /* Power of two alignment for functions. */
536 const char *ix86_align_funcs_string;
538 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
539 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
541 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
542 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
544 static rtx gen_push PARAMS ((rtx));
545 static int memory_address_length PARAMS ((rtx addr));
546 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
547 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
548 static int ix86_safe_length PARAMS ((rtx));
549 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
550 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
551 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
552 static void ix86_dump_ppro_packet PARAMS ((FILE *));
553 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
554 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
556 static void ix86_init_machine_status PARAMS ((struct function *));
557 static void ix86_mark_machine_status PARAMS ((struct function *));
558 static void ix86_free_machine_status PARAMS ((struct function *));
559 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
560 static int ix86_safe_length_prefix PARAMS ((rtx));
561 static int ix86_nsaved_regs PARAMS((void));
562 static void ix86_emit_save_regs PARAMS((void));
563 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
564 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
565 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
566 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
567 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
568 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
569 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
570 static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
571 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
572 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
576 rtx base, index, disp;
580 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
582 struct builtin_description;
583 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
585 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
587 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
588 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
589 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
590 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
591 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
592 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
593 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
597 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
599 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
600 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
601 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
602 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
603 static int ix86_save_reg PARAMS ((int, int));
604 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
606 /* Sometimes certain combinations of command options do not make
607 sense on a particular target machine. You can define a macro
608 `OVERRIDE_OPTIONS' to take account of this. This macro, if
609 defined, is executed once just after all the command options have
612 Don't use this macro to turn on various extra optimizations for
613 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
619 /* Comes from final.c -- no real reason to change it. */
620 #define MAX_CODE_ALIGN 16
624 struct processor_costs *cost; /* Processor costs */
625 int target_enable; /* Target flags to enable. */
626 int target_disable; /* Target flags to disable. */
627 int align_loop; /* Default alignments. */
632 const processor_target_table[PROCESSOR_max] =
634 {&i386_cost, 0, 0, 2, 2, 2, 1},
635 {&i486_cost, 0, 0, 4, 4, 4, 1},
636 {&pentium_cost, 0, 0, -4, -4, -4, 1},
637 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
638 {&k6_cost, 0, 0, -5, -5, 4, 1},
639 {&athlon_cost, 0, 0, 4, -4, 4, 1},
640 {&pentium4_cost, 0, 0, 2, 2, 2, 1}
645 const char *name; /* processor name or nickname. */
646 enum processor_type processor;
648 const processor_alias_table[] =
650 {"i386", PROCESSOR_I386},
651 {"i486", PROCESSOR_I486},
652 {"i586", PROCESSOR_PENTIUM},
653 {"pentium", PROCESSOR_PENTIUM},
654 {"i686", PROCESSOR_PENTIUMPRO},
655 {"pentiumpro", PROCESSOR_PENTIUMPRO},
656 {"k6", PROCESSOR_K6},
657 {"athlon", PROCESSOR_ATHLON},
658 {"pentium4", PROCESSOR_PENTIUM4},
661 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
663 #ifdef SUBTARGET_OVERRIDE_OPTIONS
664 SUBTARGET_OVERRIDE_OPTIONS;
667 ix86_arch = PROCESSOR_I386;
668 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
670 if (ix86_cmodel_string != 0)
672 if (!strcmp (ix86_cmodel_string, "small"))
673 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
675 sorry ("Code model %s not supported in PIC mode", ix86_cmodel_string);
676 else if (!strcmp (ix86_cmodel_string, "32"))
678 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
679 ix86_cmodel = CM_KERNEL;
680 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
681 ix86_cmodel = CM_MEDIUM;
682 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
683 ix86_cmodel = CM_LARGE;
685 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
691 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
693 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
694 error ("Code model `%s' not supported in the %s bit mode.",
695 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
696 if (ix86_cmodel == CM_LARGE)
697 sorry ("Code model `large' not supported yet.");
698 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
699 sorry ("%i-bit mode not compiled in.",
700 (target_flags & MASK_64BIT) ? 64 : 32);
702 if (ix86_arch_string != 0)
704 for (i = 0; i < pta_size; i++)
705 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
707 ix86_arch = processor_alias_table[i].processor;
708 /* Default cpu tuning to the architecture. */
709 ix86_cpu = ix86_arch;
714 error ("bad value (%s) for -march= switch", ix86_arch_string);
717 if (ix86_cpu_string != 0)
719 for (i = 0; i < pta_size; i++)
720 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
722 ix86_cpu = processor_alias_table[i].processor;
726 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
729 ix86_cost = processor_target_table[ix86_cpu].cost;
730 target_flags |= processor_target_table[ix86_cpu].target_enable;
731 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
733 /* Arrange to set up i386_stack_locals for all functions. */
734 init_machine_status = ix86_init_machine_status;
735 mark_machine_status = ix86_mark_machine_status;
736 free_machine_status = ix86_free_machine_status;
738 /* Validate -mregparm= value. */
739 if (ix86_regparm_string)
741 i = atoi (ix86_regparm_string);
742 if (i < 0 || i > REGPARM_MAX)
743 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
749 ix86_regparm = REGPARM_MAX;
751 /* If the user has provided any of the -malign-* options,
752 warn and use that value only if -falign-* is not set.
753 Remove this code in GCC 3.2 or later. */
754 if (ix86_align_loops_string)
756 warning ("-malign-loops is obsolete, use -falign-loops");
757 if (align_loops == 0)
759 i = atoi (ix86_align_loops_string);
760 if (i < 0 || i > MAX_CODE_ALIGN)
761 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
763 align_loops = 1 << i;
767 if (ix86_align_jumps_string)
769 warning ("-malign-jumps is obsolete, use -falign-jumps");
770 if (align_jumps == 0)
772 i = atoi (ix86_align_jumps_string);
773 if (i < 0 || i > MAX_CODE_ALIGN)
774 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
776 align_jumps = 1 << i;
780 if (ix86_align_funcs_string)
782 warning ("-malign-functions is obsolete, use -falign-functions");
783 if (align_functions == 0)
785 i = atoi (ix86_align_funcs_string);
786 if (i < 0 || i > MAX_CODE_ALIGN)
787 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
789 align_functions = 1 << i;
793 /* Default align_* from the processor table. */
794 #define abs(n) (n < 0 ? -n : n)
795 if (align_loops == 0)
796 align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
797 if (align_jumps == 0)
798 align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
799 if (align_functions == 0)
800 align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
802 /* Validate -mpreferred-stack-boundary= value, or provide default.
803 The default of 128 bits is for Pentium III's SSE __m128. */
804 ix86_preferred_stack_boundary = 128;
805 if (ix86_preferred_stack_boundary_string)
807 i = atoi (ix86_preferred_stack_boundary_string);
808 if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
809 error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
810 TARGET_64BIT ? 3 : 2);
812 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
815 /* Validate -mbranch-cost= value, or provide default. */
816 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
817 if (ix86_branch_cost_string)
819 i = atoi (ix86_branch_cost_string);
821 error ("-mbranch-cost=%d is not between 0 and 5", i);
823 ix86_branch_cost = i;
826 /* Keep nonleaf frame pointers. */
827 if (TARGET_OMIT_LEAF_FRAME_POINTER)
828 flag_omit_frame_pointer = 1;
830 /* If we're doing fast math, we don't care about comparison order
831 wrt NaNs. This lets us use a shorter comparison sequence. */
832 if (flag_unsafe_math_optimizations)
833 target_flags &= ~MASK_IEEE_FP;
835 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
838 target_flags |= MASK_MMX;
842 optimization_options (level, size)
844 int size ATTRIBUTE_UNUSED;
846 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
847 make the problem with not enough registers even worse. */
848 #ifdef INSN_SCHEDULING
850 flag_schedule_insns = 0;
854 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
855 attribute for DECL. The attributes in ATTRIBUTES have previously been
859 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
860 tree decl ATTRIBUTE_UNUSED;
861 tree attributes ATTRIBUTE_UNUSED;
862 tree identifier ATTRIBUTE_UNUSED;
863 tree args ATTRIBUTE_UNUSED;
868 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
869 attribute for TYPE. The attributes in ATTRIBUTES have previously been
873 ix86_valid_type_attribute_p (type, attributes, identifier, args)
875 tree attributes ATTRIBUTE_UNUSED;
879 if (TREE_CODE (type) != FUNCTION_TYPE
880 && TREE_CODE (type) != METHOD_TYPE
881 && TREE_CODE (type) != FIELD_DECL
882 && TREE_CODE (type) != TYPE_DECL)
885 /* Stdcall attribute says callee is responsible for popping arguments
886 if they are not variable. */
887 if (is_attribute_p ("stdcall", identifier)
889 return (args == NULL_TREE);
891 /* Cdecl attribute says the callee is a normal C declaration. */
892 if (is_attribute_p ("cdecl", identifier)
894 return (args == NULL_TREE);
896 /* Regparm attribute specifies how many integer arguments are to be
897 passed in registers. */
898 if (is_attribute_p ("regparm", identifier))
902 if (! args || TREE_CODE (args) != TREE_LIST
903 || TREE_CHAIN (args) != NULL_TREE
904 || TREE_VALUE (args) == NULL_TREE)
907 cst = TREE_VALUE (args);
908 if (TREE_CODE (cst) != INTEGER_CST)
911 if (compare_tree_int (cst, REGPARM_MAX) > 0)
920 /* Return 0 if the attributes for two types are incompatible, 1 if they
921 are compatible, and 2 if they are nearly compatible (which causes a
922 warning to be generated). */
925 ix86_comp_type_attributes (type1, type2)
929 /* Check for mismatch of non-default calling convention. */
930 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
932 if (TREE_CODE (type1) != FUNCTION_TYPE)
935 /* Check for mismatched return types (cdecl vs stdcall). */
936 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
937 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
942 /* Value is the number of bytes of arguments automatically
943 popped when returning from a subroutine call.
944 FUNDECL is the declaration node of the function (as a tree),
945 FUNTYPE is the data type of the function (as a tree),
946 or for a library call it is an identifier node for the subroutine name.
947 SIZE is the number of bytes of arguments passed on the stack.
949 On the 80386, the RTD insn may be used to pop them if the number
950 of args is fixed, but if the number is variable then the caller
951 must pop them all. RTD can't be used for library calls now
952 because the library is compiled with the Unix compiler.
953 Use of RTD is a selectable option, since it is incompatible with
954 standard Unix calling sequences. If the option is not selected,
955 the caller must always pop the args.
957 The attribute stdcall is equivalent to RTD on a per module basis. */
960 ix86_return_pops_args (fundecl, funtype, size)
965 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
967 /* Cdecl functions override -mrtd, and never pop the stack. */
968 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
970 /* Stdcall functions will pop the stack if not variable args. */
971 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
975 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
976 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
981 /* Lose any fake structure return argument. */
982 if (aggregate_value_p (TREE_TYPE (funtype))
984 return GET_MODE_SIZE (Pmode);
989 /* Argument support functions. */
991 /* Initialize a variable CUM of type CUMULATIVE_ARGS
992 for a call to a function whose data type is FNTYPE.
993 For a library call, FNTYPE is 0. */
996 init_cumulative_args (cum, fntype, libname)
997 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
998 tree fntype; /* tree ptr for function decl */
999 rtx libname; /* SYMBOL_REF of library name or 0 */
1001 static CUMULATIVE_ARGS zero_cum;
1002 tree param, next_param;
1004 if (TARGET_DEBUG_ARG)
1006 fprintf (stderr, "\ninit_cumulative_args (");
1008 fprintf (stderr, "fntype code = %s, ret code = %s",
1009 tree_code_name[(int) TREE_CODE (fntype)],
1010 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1012 fprintf (stderr, "no fntype");
1015 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1020 /* Set up the number of registers to use for passing arguments. */
1021 cum->nregs = ix86_regparm;
1024 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1027 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1030 /* Determine if this function has variable arguments. This is
1031 indicated by the last argument being 'void_type_mode' if there
1032 are no variable arguments. If there are variable arguments, then
1033 we won't pass anything in registers */
1037 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1038 param != 0; param = next_param)
1040 next_param = TREE_CHAIN (param);
1041 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1046 if (TARGET_DEBUG_ARG)
1047 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1052 /* Update the data in CUM to advance over an argument
1053 of mode MODE and data type TYPE.
1054 (TYPE is null for libcalls where that information may not be available.) */
1057 function_arg_advance (cum, mode, type, named)
1058 CUMULATIVE_ARGS *cum; /* current arg information */
1059 enum machine_mode mode; /* current arg mode */
1060 tree type; /* type of the argument or 0 if lib support */
1061 int named; /* whether or not the argument was named */
1064 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1065 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1067 if (TARGET_DEBUG_ARG)
1069 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1070 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1071 if (TARGET_SSE && mode == TImode)
1073 cum->sse_words += words;
1074 cum->sse_nregs -= 1;
1075 cum->sse_regno += 1;
1076 if (cum->sse_nregs <= 0)
1084 cum->words += words;
1085 cum->nregs -= words;
1086 cum->regno += words;
1088 if (cum->nregs <= 0)
1097 /* Define where to put the arguments to a function.
1098 Value is zero to push the argument on the stack,
1099 or a hard register in which to store the argument.
1101 MODE is the argument's machine mode.
1102 TYPE is the data type of the argument (as a tree).
1103 This is null for libcalls where that information may
1105 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1106 the preceding args and about the function being called.
1107 NAMED is nonzero if this argument is a named parameter
1108 (otherwise it is an extra parameter matching an ellipsis). */
1111 function_arg (cum, mode, type, named)
1112 CUMULATIVE_ARGS *cum; /* current arg information */
1113 enum machine_mode mode; /* current arg mode */
1114 tree type; /* type of the argument or 0 if lib support */
1115 int named; /* != 0 for normal args, == 0 for ... args */
1119 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1120 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1122 if (mode == VOIDmode)
1127 /* For now, pass fp/complex values on the stack. */
1136 if (words <= cum->nregs)
1137 ret = gen_rtx_REG (mode, cum->regno);
1141 ret = gen_rtx_REG (mode, cum->sse_regno);
1145 if (TARGET_DEBUG_ARG)
1148 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
1149 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1152 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
1154 fprintf (stderr, ", stack");
1156 fprintf (stderr, " )\n");
1163 /* Return nonzero if OP is general operand representable on x86_64. */
1166 x86_64_general_operand (op, mode)
1168 enum machine_mode mode;
1171 return general_operand (op, mode);
1172 if (nonimmediate_operand (op, mode))
1174 return x86_64_sign_extended_value (op);
1177 /* Return nonzero if OP is general operand representable on x86_64
1178 as eighter sign extended or zero extended constant. */
1181 x86_64_szext_general_operand (op, mode)
1183 enum machine_mode mode;
1186 return general_operand (op, mode);
1187 if (nonimmediate_operand (op, mode))
1189 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1192 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1195 x86_64_nonmemory_operand (op, mode)
1197 enum machine_mode mode;
1200 return nonmemory_operand (op, mode);
1201 if (register_operand (op, mode))
1203 return x86_64_sign_extended_value (op);
1206 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
1209 x86_64_movabs_operand (op, mode)
1211 enum machine_mode mode;
1213 if (!TARGET_64BIT || !flag_pic)
1214 return nonmemory_operand (op, mode);
1215 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
1217 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
1222 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
1225 x86_64_szext_nonmemory_operand (op, mode)
1227 enum machine_mode mode;
1230 return nonmemory_operand (op, mode);
1231 if (register_operand (op, mode))
1233 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
1236 /* Return nonzero if OP is immediate operand representable on x86_64. */
1239 x86_64_immediate_operand (op, mode)
1241 enum machine_mode mode;
1244 return immediate_operand (op, mode);
1245 return x86_64_sign_extended_value (op);
1248 /* Return nonzero if OP is immediate operand representable on x86_64. */
1251 x86_64_zext_immediate_operand (op, mode)
1253 enum machine_mode mode ATTRIBUTE_UNUSED;
1255 return x86_64_zero_extended_value (op);
1258 /* Return nonzero if OP is (const_int 1), else return zero. */
1261 const_int_1_operand (op, mode)
1263 enum machine_mode mode ATTRIBUTE_UNUSED;
1265 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1268 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1269 reference and a constant. */
1272 symbolic_operand (op, mode)
1274 enum machine_mode mode ATTRIBUTE_UNUSED;
1276 switch (GET_CODE (op))
1284 if (GET_CODE (op) == SYMBOL_REF
1285 || GET_CODE (op) == LABEL_REF
1286 || (GET_CODE (op) == UNSPEC
1287 && XINT (op, 1) >= 6
1288 && XINT (op, 1) <= 7))
1290 if (GET_CODE (op) != PLUS
1291 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1295 if (GET_CODE (op) == SYMBOL_REF
1296 || GET_CODE (op) == LABEL_REF)
1298 /* Only @GOTOFF gets offsets. */
1299 if (GET_CODE (op) != UNSPEC
1300 || XINT (op, 1) != 7)
1303 op = XVECEXP (op, 0, 0);
1304 if (GET_CODE (op) == SYMBOL_REF
1305 || GET_CODE (op) == LABEL_REF)
1314 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1317 pic_symbolic_operand (op, mode)
1319 enum machine_mode mode ATTRIBUTE_UNUSED;
1321 if (GET_CODE (op) == CONST)
1324 if (GET_CODE (op) == UNSPEC)
1326 if (GET_CODE (op) != PLUS
1327 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1330 if (GET_CODE (op) == UNSPEC)
1336 /* Test for a valid operand for a call instruction. Don't allow the
1337 arg pointer register or virtual regs since they may decay into
1338 reg + const, which the patterns can't handle. */
1341 call_insn_operand (op, mode)
1343 enum machine_mode mode ATTRIBUTE_UNUSED;
1345 /* Disallow indirect through a virtual register. This leads to
1346 compiler aborts when trying to eliminate them. */
1347 if (GET_CODE (op) == REG
1348 && (op == arg_pointer_rtx
1349 || op == frame_pointer_rtx
1350 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1351 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1354 /* Disallow `call 1234'. Due to varying assembler lameness this
1355 gets either rejected or translated to `call .+1234'. */
1356 if (GET_CODE (op) == CONST_INT)
1359 /* Explicitly allow SYMBOL_REF even if pic. */
1360 if (GET_CODE (op) == SYMBOL_REF)
1363 /* Half-pic doesn't allow anything but registers and constants.
1364 We've just taken care of the later. */
1366 return register_operand (op, Pmode);
1368 /* Otherwise we can allow any general_operand in the address. */
1369 return general_operand (op, Pmode);
1373 constant_call_address_operand (op, mode)
1375 enum machine_mode mode ATTRIBUTE_UNUSED;
1377 if (GET_CODE (op) == CONST
1378 && GET_CODE (XEXP (op, 0)) == PLUS
1379 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1380 op = XEXP (XEXP (op, 0), 0);
1381 return GET_CODE (op) == SYMBOL_REF;
1384 /* Match exactly zero and one. */
1387 const0_operand (op, mode)
1389 enum machine_mode mode;
1391 return op == CONST0_RTX (mode);
1395 const1_operand (op, mode)
1397 enum machine_mode mode ATTRIBUTE_UNUSED;
1399 return op == const1_rtx;
1402 /* Match 2, 4, or 8. Used for leal multiplicands. */
1405 const248_operand (op, mode)
1407 enum machine_mode mode ATTRIBUTE_UNUSED;
1409 return (GET_CODE (op) == CONST_INT
1410 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1413 /* True if this is a constant appropriate for an increment or decremenmt. */
1416 incdec_operand (op, mode)
1418 enum machine_mode mode ATTRIBUTE_UNUSED;
1420 /* On Pentium4, the inc and dec operations causes extra dependancy on flag
1421 registers, since carry flag is not set. */
1422 if (TARGET_PENTIUM4 && !optimize_size)
1424 return op == const1_rtx || op == constm1_rtx;
1427 /* Return nonzero if OP is acceptable as operand of DImode shift
1431 shiftdi_operand (op, mode)
1433 enum machine_mode mode ATTRIBUTE_UNUSED;
1436 return nonimmediate_operand (op, mode);
1438 return register_operand (op, mode);
1441 /* Return false if this is the stack pointer, or any other fake
1442 register eliminable to the stack pointer. Otherwise, this is
1445 This is used to prevent esp from being used as an index reg.
1446 Which would only happen in pathological cases. */
1449 reg_no_sp_operand (op, mode)
1451 enum machine_mode mode;
1454 if (GET_CODE (t) == SUBREG)
1456 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1459 return register_operand (op, mode);
1463 mmx_reg_operand (op, mode)
1465 enum machine_mode mode ATTRIBUTE_UNUSED;
1467 return MMX_REG_P (op);
1470 /* Return false if this is any eliminable register. Otherwise
1474 general_no_elim_operand (op, mode)
1476 enum machine_mode mode;
1479 if (GET_CODE (t) == SUBREG)
1481 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1482 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1483 || t == virtual_stack_dynamic_rtx)
1486 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
1487 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
1490 return general_operand (op, mode);
1493 /* Return false if this is any eliminable register. Otherwise
1494 register_operand or const_int. */
1497 nonmemory_no_elim_operand (op, mode)
1499 enum machine_mode mode;
1502 if (GET_CODE (t) == SUBREG)
1504 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1505 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1506 || t == virtual_stack_dynamic_rtx)
1509 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1512 /* Return true if op is a Q_REGS class register. */
1515 q_regs_operand (op, mode)
1517 enum machine_mode mode;
1519 if (mode != VOIDmode && GET_MODE (op) != mode)
1521 if (GET_CODE (op) == SUBREG)
1522 op = SUBREG_REG (op);
1523 return QI_REG_P (op);
1526 /* Return true if op is a NON_Q_REGS class register. */
1529 non_q_regs_operand (op, mode)
1531 enum machine_mode mode;
1533 if (mode != VOIDmode && GET_MODE (op) != mode)
1535 if (GET_CODE (op) == SUBREG)
1536 op = SUBREG_REG (op);
1537 return NON_QI_REG_P (op);
1540 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1543 sse_comparison_operator (op, mode)
1545 enum machine_mode mode ATTRIBUTE_UNUSED;
1547 enum rtx_code code = GET_CODE (op);
1550 /* Operations supported directly. */
1560 /* These are equivalent to ones above in non-IEEE comparisons. */
1567 return !TARGET_IEEE_FP;
1572 /* Return 1 if OP is a valid comparison operator in valid mode. */
1574 ix86_comparison_operator (op, mode)
1576 enum machine_mode mode;
1578 enum machine_mode inmode;
1579 enum rtx_code code = GET_CODE (op);
1580 if (mode != VOIDmode && GET_MODE (op) != mode)
1582 if (GET_RTX_CLASS (code) != '<')
1584 inmode = GET_MODE (XEXP (op, 0));
1586 if (inmode == CCFPmode || inmode == CCFPUmode)
1588 enum rtx_code second_code, bypass_code;
1589 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1590 return (bypass_code == NIL && second_code == NIL);
1597 if (inmode == CCmode || inmode == CCGCmode
1598 || inmode == CCGOCmode || inmode == CCNOmode)
1601 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1602 if (inmode == CCmode)
1606 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1614 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1617 fcmov_comparison_operator (op, mode)
1619 enum machine_mode mode;
1621 enum machine_mode inmode;
1622 enum rtx_code code = GET_CODE (op);
1623 if (mode != VOIDmode && GET_MODE (op) != mode)
1625 if (GET_RTX_CLASS (code) != '<')
1627 inmode = GET_MODE (XEXP (op, 0));
1628 if (inmode == CCFPmode || inmode == CCFPUmode)
1630 enum rtx_code second_code, bypass_code;
1631 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1632 if (bypass_code != NIL || second_code != NIL)
1634 code = ix86_fp_compare_code_to_integer (code);
1636 /* i387 supports just limited amount of conditional codes. */
1639 case LTU: case GTU: case LEU: case GEU:
1640 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1643 case ORDERED: case UNORDERED:
1651 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1654 promotable_binary_operator (op, mode)
1656 enum machine_mode mode ATTRIBUTE_UNUSED;
1658 switch (GET_CODE (op))
1661 /* Modern CPUs have same latency for HImode and SImode multiply,
1662 but 386 and 486 do HImode multiply faster. */
1663 return ix86_cpu > PROCESSOR_I486;
1675 /* Nearly general operand, but accept any const_double, since we wish
1676 to be able to drop them into memory rather than have them get pulled
1680 cmp_fp_expander_operand (op, mode)
1682 enum machine_mode mode;
1684 if (mode != VOIDmode && mode != GET_MODE (op))
1686 if (GET_CODE (op) == CONST_DOUBLE)
1688 return general_operand (op, mode);
1691 /* Match an SI or HImode register for a zero_extract. */
1694 ext_register_operand (op, mode)
1696 enum machine_mode mode ATTRIBUTE_UNUSED;
1699 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
1700 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1703 if (!register_operand (op, VOIDmode))
1706 /* Be curefull to accept only registers having upper parts. */
1707 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
1708 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
1711 /* Return 1 if this is a valid binary floating-point operation.
1712 OP is the expression matched, and MODE is its mode. */
1715 binary_fp_operator (op, mode)
1717 enum machine_mode mode;
1719 if (mode != VOIDmode && mode != GET_MODE (op))
1722 switch (GET_CODE (op))
1728 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1736 mult_operator(op, mode)
1738 enum machine_mode mode ATTRIBUTE_UNUSED;
1740 return GET_CODE (op) == MULT;
1744 div_operator(op, mode)
1746 enum machine_mode mode ATTRIBUTE_UNUSED;
1748 return GET_CODE (op) == DIV;
1752 arith_or_logical_operator (op, mode)
1754 enum machine_mode mode;
1756 return ((mode == VOIDmode || GET_MODE (op) == mode)
1757 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1758 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1761 /* Returns 1 if OP is memory operand with a displacement. */
1764 memory_displacement_operand (op, mode)
1766 enum machine_mode mode;
1768 struct ix86_address parts;
1770 if (! memory_operand (op, mode))
1773 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1776 return parts.disp != NULL_RTX;
1779 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1780 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1782 ??? It seems likely that this will only work because cmpsi is an
1783 expander, and no actual insns use this. */
1786 cmpsi_operand (op, mode)
1788 enum machine_mode mode;
1790 if (general_operand (op, mode))
1793 if (GET_CODE (op) == AND
1794 && GET_MODE (op) == SImode
1795 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1796 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1797 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1798 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1799 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1800 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1806 /* Returns 1 if OP is memory operand that can not be represented by the
1810 long_memory_operand (op, mode)
1812 enum machine_mode mode;
1814 if (! memory_operand (op, mode))
1817 return memory_address_length (op) != 0;
1820 /* Return nonzero if the rtx is known aligned. */
1823 aligned_operand (op, mode)
1825 enum machine_mode mode;
1827 struct ix86_address parts;
1829 if (!general_operand (op, mode))
1832 /* Registers and immediate operands are always "aligned". */
1833 if (GET_CODE (op) != MEM)
1836 /* Don't even try to do any aligned optimizations with volatiles. */
1837 if (MEM_VOLATILE_P (op))
1842 /* Pushes and pops are only valid on the stack pointer. */
1843 if (GET_CODE (op) == PRE_DEC
1844 || GET_CODE (op) == POST_INC)
1847 /* Decode the address. */
1848 if (! ix86_decompose_address (op, &parts))
1851 /* Look for some component that isn't known to be aligned. */
1855 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1860 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1865 if (GET_CODE (parts.disp) != CONST_INT
1866 || (INTVAL (parts.disp) & 3) != 0)
1870 /* Didn't find one -- this must be an aligned address. */
1874 /* Return true if the constant is something that can be loaded with
1875 a special instruction. Only handle 0.0 and 1.0; others are less
1879 standard_80387_constant_p (x)
1882 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
1884 /* Note that on the 80387, other constants, such as pi, that we should support
1885 too. On some machines, these are much slower to load as standard constant,
1886 than to load from doubles in memory. */
1887 if (x == CONST0_RTX (GET_MODE (x)))
1889 if (x == CONST1_RTX (GET_MODE (x)))
1894 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
1897 standard_sse_constant_p (x)
1900 if (GET_CODE (x) != CONST_DOUBLE)
1902 return (x == CONST0_RTX (GET_MODE (x)));
1905 /* Returns 1 if OP contains a symbol reference */
1908 symbolic_reference_mentioned_p (op)
1911 register const char *fmt;
1914 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1917 fmt = GET_RTX_FORMAT (GET_CODE (op));
1918 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1924 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1925 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1929 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1936 /* Return 1 if it is appropriate to emit `ret' instructions in the
1937 body of a function. Do this only if the epilogue is simple, needing a
1938 couple of insns. Prior to reloading, we can't tell how many registers
1939 must be saved, so return 0 then. Return 0 if there is no frame
1940 marker to de-allocate.
1942 If NON_SAVING_SETJMP is defined and true, then it is not possible
1943 for the epilogue to be simple, so return 0. This is a special case
1944 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1945 until final, but jump_optimize may need to know sooner if a
1949 ix86_can_use_return_insn_p ()
1951 struct ix86_frame frame;
1953 #ifdef NON_SAVING_SETJMP
1954 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1957 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1958 if (profile_block_flag == 2)
1962 if (! reload_completed || frame_pointer_needed)
1965 /* Don't allow more than 32 pop, since that's all we can do
1966 with one instruction. */
1967 if (current_function_pops_args
1968 && current_function_args_size >= 32768)
1971 ix86_compute_frame_layout (&frame);
1972 return frame.to_allocate == 0 && frame.nregs == 0;
1975 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
1977 x86_64_sign_extended_value (value)
1980 switch (GET_CODE (value))
1982 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
1983 to be at least 32 and this all acceptable constants are
1984 represented as CONST_INT. */
1986 if (HOST_BITS_PER_WIDE_INT == 32)
1990 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
1991 return trunc_int_for_mode (val, SImode) == val;
1995 /* For certain code models, the symbolic references are known to fit. */
1997 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
1999 /* For certain code models, the code is near as well. */
2001 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
2003 /* We also may accept the offsetted memory references in certain special
2006 if (GET_CODE (XEXP (value, 0)) == UNSPEC
2007 && XVECLEN (XEXP (value, 0), 0) == 1
2008 && XINT (XEXP (value, 0), 1) == 15)
2010 else if (GET_CODE (XEXP (value, 0)) == PLUS)
2012 rtx op1 = XEXP (XEXP (value, 0), 0);
2013 rtx op2 = XEXP (XEXP (value, 0), 1);
2014 HOST_WIDE_INT offset;
2016 if (ix86_cmodel == CM_LARGE)
2018 if (GET_CODE (op2) != CONST_INT)
2020 offset = trunc_int_for_mode (INTVAL (op2), DImode);
2021 switch (GET_CODE (op1))
2024 /* For CM_SMALL assume that latest object is 1MB before
2025 end of 31bits boundary. We may also accept pretty
2026 large negative constants knowing that all objects are
2027 in the positive half of address space. */
2028 if (ix86_cmodel == CM_SMALL
2029 && offset < 1024*1024*1024
2030 && trunc_int_for_mode (offset, SImode) == offset)
2032 /* For CM_KERNEL we know that all object resist in the
2033 negative half of 32bits address space. We may not
2034 accept negative offsets, since they may be just off
2035 and we may accept pretty large possitive ones. */
2036 if (ix86_cmodel == CM_KERNEL
2038 && trunc_int_for_mode (offset, SImode) == offset)
2042 /* These conditions are similar to SYMBOL_REF ones, just the
2043 constraints for code models differ. */
2044 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2045 && offset < 1024*1024*1024
2046 && trunc_int_for_mode (offset, SImode) == offset)
2048 if (ix86_cmodel == CM_KERNEL
2050 && trunc_int_for_mode (offset, SImode) == offset)
2063 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
2065 x86_64_zero_extended_value (value)
2068 switch (GET_CODE (value))
2071 if (HOST_BITS_PER_WIDE_INT == 32)
2072 return (GET_MODE (value) == VOIDmode
2073 && !CONST_DOUBLE_HIGH (value));
2077 if (HOST_BITS_PER_WIDE_INT == 32)
2078 return INTVAL (value) >= 0;
2080 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
2083 /* For certain code models, the symbolic references are known to fit. */
2085 return ix86_cmodel == CM_SMALL;
2087 /* For certain code models, the code is near as well. */
2089 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
2091 /* We also may accept the offsetted memory references in certain special
2094 if (GET_CODE (XEXP (value, 0)) == PLUS)
2096 rtx op1 = XEXP (XEXP (value, 0), 0);
2097 rtx op2 = XEXP (XEXP (value, 0), 1);
2099 if (ix86_cmodel == CM_LARGE)
2101 switch (GET_CODE (op1))
2105 /* For small code model we may accept pretty large possitive
2106 offsets, since one bit is available for free. Negative
2107 offsets are limited by the size of NULL pointer area
2108 specified by the ABI. */
2109 if (ix86_cmodel == CM_SMALL
2110 && GET_CODE (op2) == CONST_INT
2111 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2112 && (trunc_int_for_mode (INTVAL (op2), SImode)
2115 /* ??? For the kernel, we may accept adjustment of
2116 -0x10000000, since we know that it will just convert
2117 negative address space to possitive, but perhaps this
2118 is not worthwhile. */
2121 /* These conditions are similar to SYMBOL_REF ones, just the
2122 constraints for code models differ. */
2123 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
2124 && GET_CODE (op2) == CONST_INT
2125 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
2126 && (trunc_int_for_mode (INTVAL (op2), SImode)
2140 /* Value should be nonzero if functions must have frame pointers.
2141 Zero means the frame pointer need not be set up (and parms may
2142 be accessed via the stack pointer) in functions that seem suitable. */
2145 ix86_frame_pointer_required ()
2147 /* If we accessed previous frames, then the generated code expects
2148 to be able to access the saved ebp value in our frame. */
2149 if (cfun->machine->accesses_prev_frame)
2152 /* Several x86 os'es need a frame pointer for other reasons,
2153 usually pertaining to setjmp. */
2154 if (SUBTARGET_FRAME_POINTER_REQUIRED)
2157 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
2158 the frame pointer by default. Turn it back on now if we've not
2159 got a leaf function. */
2160 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
2166 /* Record that the current function accesses previous call frames. */
2169 ix86_setup_frame_addresses ()
2171 cfun->machine->accesses_prev_frame = 1;
2174 static char pic_label_name[32];
2176 /* This function generates code for -fpic that loads %ebx with
2177 the return address of the caller and then returns. */
2180 ix86_asm_file_end (file)
2185 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
2188 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
2189 to updating relocations to a section being discarded such that this
2190 doesn't work. Ought to detect this at configure time. */
2191 #if 0 && defined (ASM_OUTPUT_SECTION_NAME)
2192 /* The trick here is to create a linkonce section containing the
2193 pic label thunk, but to refer to it with an internal label.
2194 Because the label is internal, we don't have inter-dso name
2195 binding issues on hosts that don't support ".hidden".
2197 In order to use these macros, however, we must create a fake
2200 tree decl = build_decl (FUNCTION_DECL,
2201 get_identifier ("i686.get_pc_thunk"),
2203 DECL_ONE_ONLY (decl) = 1;
2204 UNIQUE_SECTION (decl, 0);
2205 named_section (decl, NULL, 0);
2211 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
2212 internal (non-global) label that's being emitted, it didn't make
2213 sense to have .type information for local labels. This caused
2214 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
2215 me debug info for a label that you're declaring non-global?) this
2216 was changed to call ASM_OUTPUT_LABEL() instead. */
2218 ASM_OUTPUT_LABEL (file, pic_label_name);
2220 xops[0] = pic_offset_table_rtx;
2221 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
2222 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
2223 output_asm_insn ("ret", xops);
2227 load_pic_register ()
2234 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
2236 if (TARGET_DEEP_BRANCH_PREDICTION)
2238 if (! pic_label_name[0])
2239 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
2240 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
2244 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
2247 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2249 if (! TARGET_DEEP_BRANCH_PREDICTION)
2250 emit_insn (gen_popsi1 (pic_offset_table_rtx));
2252 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
2255 /* Generate an "push" pattern for input ARG. */
2261 return gen_rtx_SET (VOIDmode,
2263 gen_rtx_PRE_DEC (Pmode,
2264 stack_pointer_rtx)),
2268 /* Return 1 if we need to save REGNO. */
2270 ix86_save_reg (regno, maybe_eh_return)
2272 int maybe_eh_return;
2276 && regno == PIC_OFFSET_TABLE_REGNUM
2277 && (current_function_uses_pic_offset_table
2278 || current_function_uses_const_pool
2279 || current_function_calls_eh_return))
2282 if (current_function_calls_eh_return && maybe_eh_return)
2287 unsigned test = EH_RETURN_DATA_REGNO(i);
2288 if (test == INVALID_REGNUM)
2290 if (test == (unsigned) regno)
2295 return (regs_ever_live[regno]
2296 && !call_used_regs[regno]
2297 && !fixed_regs[regno]
2298 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
2301 /* Return number of registers to be saved on the stack. */
2309 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2310 if (ix86_save_reg (regno, true))
2315 /* Return the offset between two registers, one to be eliminated, and the other
2316 its replacement, at the start of a routine. */
2319 ix86_initial_elimination_offset (from, to)
2323 struct ix86_frame frame;
2324 ix86_compute_frame_layout (&frame);
2326 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2327 return frame.hard_frame_pointer_offset;
2328 else if (from == FRAME_POINTER_REGNUM
2329 && to == HARD_FRAME_POINTER_REGNUM)
2330 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
2333 if (to != STACK_POINTER_REGNUM)
2335 else if (from == ARG_POINTER_REGNUM)
2336 return frame.stack_pointer_offset;
2337 else if (from != FRAME_POINTER_REGNUM)
2340 return frame.stack_pointer_offset - frame.frame_pointer_offset;
2344 /* Fill structure ix86_frame about frame of currently computed function. */
2347 ix86_compute_frame_layout (frame)
2348 struct ix86_frame *frame;
2350 HOST_WIDE_INT total_size;
2351 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
2353 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
2354 HOST_WIDE_INT size = get_frame_size ();
2356 frame->nregs = ix86_nsaved_regs ();
2359 /* Skip return value and save base pointer. */
2360 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
2362 frame->hard_frame_pointer_offset = offset;
2364 /* Do some sanity checking of stack_alignment_needed and
2365 preferred_alignment, since i386 port is the only using those features
2366 that may break easilly. */
2368 if (size && !stack_alignment_needed)
2370 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
2372 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2374 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
2377 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
2378 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
2380 /* Register save area */
2381 offset += frame->nregs * UNITS_PER_WORD;
2384 if (ix86_save_varrargs_registers)
2386 offset += X86_64_VARARGS_SIZE;
2387 frame->va_arg_size = X86_64_VARARGS_SIZE;
2390 frame->va_arg_size = 0;
2392 /* Align start of frame for local function. */
2393 frame->padding1 = ((offset + stack_alignment_needed - 1)
2394 & -stack_alignment_needed) - offset;
2396 offset += frame->padding1;
2398 /* Frame pointer points here. */
2399 frame->frame_pointer_offset = offset;
2403 /* Add outgoing arguments area. */
2404 if (ACCUMULATE_OUTGOING_ARGS)
2406 offset += current_function_outgoing_args_size;
2407 frame->outgoing_arguments_size = current_function_outgoing_args_size;
2410 frame->outgoing_arguments_size = 0;
2412 /* Align stack boundary. */
2413 frame->padding2 = ((offset + preferred_alignment - 1)
2414 & -preferred_alignment) - offset;
2416 offset += frame->padding2;
2418 /* We've reached end of stack frame. */
2419 frame->stack_pointer_offset = offset;
2421 /* Size prologue needs to allocate. */
2422 frame->to_allocate =
2423 (size + frame->padding1 + frame->padding2
2424 + frame->outgoing_arguments_size + frame->va_arg_size);
2426 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
2427 && current_function_is_leaf)
2429 frame->red_zone_size = frame->to_allocate;
2430 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
2431 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
2434 frame->red_zone_size = 0;
2435 frame->to_allocate -= frame->red_zone_size;
2436 frame->stack_pointer_offset -= frame->red_zone_size;
2438 fprintf (stderr, "nregs: %i\n", frame->nregs);
2439 fprintf (stderr, "size: %i\n", size);
2440 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
2441 fprintf (stderr, "padding1: %i\n", frame->padding1);
2442 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
2443 fprintf (stderr, "padding2: %i\n", frame->padding2);
2444 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
2445 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
2446 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
2447 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
2448 frame->hard_frame_pointer_offset);
2449 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
2453 /* Emit code to save registers in the prologue. */
2456 ix86_emit_save_regs ()
2461 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
2462 if (ix86_save_reg (regno, true))
2464 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
2465 RTX_FRAME_RELATED_P (insn) = 1;
2469 /* Expand the prologue into a bunch of separate insns. */
2472 ix86_expand_prologue ()
2475 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
2476 || current_function_uses_const_pool)
2478 struct ix86_frame frame;
2480 ix86_compute_frame_layout (&frame);
2482 /* Note: AT&T enter does NOT have reversed args. Enter is probably
2483 slower on all targets. Also sdb doesn't like it. */
2485 if (frame_pointer_needed)
2487 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
2488 RTX_FRAME_RELATED_P (insn) = 1;
2490 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2491 RTX_FRAME_RELATED_P (insn) = 1;
2494 ix86_emit_save_regs ();
2496 if (frame.to_allocate == 0)
2498 else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
2500 if (frame_pointer_needed)
2501 insn = emit_insn (gen_pro_epilogue_adjust_stack
2502 (stack_pointer_rtx, stack_pointer_rtx,
2503 GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
2506 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2507 GEN_INT (-frame.to_allocate)));
2509 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2510 GEN_INT (-frame.to_allocate)));
2511 RTX_FRAME_RELATED_P (insn) = 1;
2515 /* ??? Is this only valid for Win32? */
2522 arg0 = gen_rtx_REG (SImode, 0);
2523 emit_move_insn (arg0, GEN_INT (frame.to_allocate));
2525 sym = gen_rtx_MEM (FUNCTION_MODE,
2526 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
2527 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
2529 CALL_INSN_FUNCTION_USAGE (insn)
2530 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2531 CALL_INSN_FUNCTION_USAGE (insn));
2534 #ifdef SUBTARGET_PROLOGUE
2539 load_pic_register ();
2541 /* If we are profiling, make sure no instructions are scheduled before
2542 the call to mcount. However, if -fpic, the above call will have
2544 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2545 emit_insn (gen_blockage ());
2548 /* Emit code to add TSIZE to esp value. Use POP instruction when
2552 ix86_emit_epilogue_esp_adjustment (tsize)
2555 /* If a frame pointer is present, we must be sure to tie the sp
2556 to the fp so that we don't mis-schedule. */
2557 if (frame_pointer_needed)
2558 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2561 hard_frame_pointer_rtx));
2564 emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2567 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2571 /* Emit code to restore saved registers using MOV insns. First register
2572 is restored from POINTER + OFFSET. */
2574 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
2577 int maybe_eh_return;
2581 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2582 if (ix86_save_reg (regno, maybe_eh_return))
2584 emit_move_insn (gen_rtx_REG (Pmode, regno),
2585 adj_offsettable_operand (gen_rtx_MEM (Pmode,
2588 offset += UNITS_PER_WORD;
2592 /* Restore function stack, frame, and registers. */
2595 ix86_expand_epilogue (style)
2599 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2600 struct ix86_frame frame;
2601 HOST_WIDE_INT offset;
2603 ix86_compute_frame_layout (&frame);
2605 /* Calculate start of saved registers relative to ebp. Special care
2606 must be taken for the normal return case of a function using
2607 eh_return: the eax and edx registers are marked as saved, but not
2608 restored along this path. */
2609 offset = frame.nregs;
2610 if (current_function_calls_eh_return && style != 2)
2612 offset *= -UNITS_PER_WORD;
2614 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2615 if (profile_block_flag == 2)
2617 FUNCTION_BLOCK_PROFILER_EXIT;
2621 /* If we're only restoring one register and sp is not valid then
2622 using a move instruction to restore the register since it's
2623 less work than reloading sp and popping the register.
2625 The default code result in stack adjustment using add/lea instruction,
2626 while this code results in LEAVE instruction (or discrete equivalent),
2627 so it is profitable in some other cases as well. Especially when there
2628 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2629 and there is exactly one register to pop. This heruistic may need some
2630 tuning in future. */
2631 if ((!sp_valid && frame.nregs <= 1)
2632 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2633 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2634 && frame.nregs == 1)
2637 /* Restore registers. We can use ebp or esp to address the memory
2638 locations. If both are available, default to ebp, since offsets
2639 are known to be small. Only exception is esp pointing directly to the
2640 end of block of saved registers, where we may simplify addressing
2643 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
2644 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
2645 frame.to_allocate, style == 2);
2647 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
2648 offset, style == 2);
2650 /* eh_return epilogues need %ecx added to the stack pointer. */
2653 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2655 if (frame_pointer_needed)
2657 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
2658 tmp = plus_constant (tmp, UNITS_PER_WORD);
2659 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
2661 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
2662 emit_move_insn (hard_frame_pointer_rtx, tmp);
2664 emit_insn (gen_pro_epilogue_adjust_stack
2665 (stack_pointer_rtx, sa, const0_rtx,
2666 hard_frame_pointer_rtx));
2670 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
2671 tmp = plus_constant (tmp, (frame.to_allocate
2672 + frame.nregs * UNITS_PER_WORD));
2673 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
2676 else if (!frame_pointer_needed)
2677 ix86_emit_epilogue_esp_adjustment (frame.to_allocate
2678 + frame.nregs * UNITS_PER_WORD);
2679 /* If not an i386, mov & pop is faster than "leave". */
2680 else if (TARGET_USE_LEAVE || optimize_size)
2681 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
2684 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2685 hard_frame_pointer_rtx,
2687 hard_frame_pointer_rtx));
2689 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2691 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2696 /* First step is to deallocate the stack frame so that we can
2697 pop the registers. */
2700 if (!frame_pointer_needed)
2702 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2703 hard_frame_pointer_rtx,
2705 hard_frame_pointer_rtx));
2707 else if (frame.to_allocate)
2708 ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
2710 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2711 if (ix86_save_reg (regno, false))
2714 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
2716 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
2718 if (frame_pointer_needed)
2721 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
2723 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2727 /* Sibcall epilogues don't want a return instruction. */
2731 if (current_function_pops_args && current_function_args_size)
2733 rtx popc = GEN_INT (current_function_pops_args);
2735 /* i386 can only pop 64K bytes. If asked to pop more, pop
2736 return address, do explicit add, and jump indirectly to the
2739 if (current_function_pops_args >= 65536)
2741 rtx ecx = gen_rtx_REG (SImode, 2);
2743 /* There are is no "pascal" calling convention in 64bit ABI. */
2747 emit_insn (gen_popsi1 (ecx));
2748 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2749 emit_jump_insn (gen_return_indirect_internal (ecx));
2752 emit_jump_insn (gen_return_pop_internal (popc));
2755 emit_jump_insn (gen_return_internal ());
2758 /* Extract the parts of an RTL expression that is a valid memory address
2759 for an instruction. Return false if the structure of the address is
2763 ix86_decompose_address (addr, out)
2765 struct ix86_address *out;
2767 rtx base = NULL_RTX;
2768 rtx index = NULL_RTX;
2769 rtx disp = NULL_RTX;
2770 HOST_WIDE_INT scale = 1;
2771 rtx scale_rtx = NULL_RTX;
2773 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2775 else if (GET_CODE (addr) == PLUS)
2777 rtx op0 = XEXP (addr, 0);
2778 rtx op1 = XEXP (addr, 1);
2779 enum rtx_code code0 = GET_CODE (op0);
2780 enum rtx_code code1 = GET_CODE (op1);
2782 if (code0 == REG || code0 == SUBREG)
2784 if (code1 == REG || code1 == SUBREG)
2785 index = op0, base = op1; /* index + base */
2787 base = op0, disp = op1; /* base + displacement */
2789 else if (code0 == MULT)
2791 index = XEXP (op0, 0);
2792 scale_rtx = XEXP (op0, 1);
2793 if (code1 == REG || code1 == SUBREG)
2794 base = op1; /* index*scale + base */
2796 disp = op1; /* index*scale + disp */
2798 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2800 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2801 scale_rtx = XEXP (XEXP (op0, 0), 1);
2802 base = XEXP (op0, 1);
2805 else if (code0 == PLUS)
2807 index = XEXP (op0, 0); /* index + base + disp */
2808 base = XEXP (op0, 1);
2814 else if (GET_CODE (addr) == MULT)
2816 index = XEXP (addr, 0); /* index*scale */
2817 scale_rtx = XEXP (addr, 1);
2819 else if (GET_CODE (addr) == ASHIFT)
2823 /* We're called for lea too, which implements ashift on occasion. */
2824 index = XEXP (addr, 0);
2825 tmp = XEXP (addr, 1);
2826 if (GET_CODE (tmp) != CONST_INT)
2828 scale = INTVAL (tmp);
2829 if ((unsigned HOST_WIDE_INT) scale > 3)
2834 disp = addr; /* displacement */
2836 /* Extract the integral value of scale. */
2839 if (GET_CODE (scale_rtx) != CONST_INT)
2841 scale = INTVAL (scale_rtx);
2844 /* Allow arg pointer and stack pointer as index if there is not scaling */
2845 if (base && index && scale == 1
2846 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2847 || index == stack_pointer_rtx))
2854 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2855 if ((base == hard_frame_pointer_rtx
2856 || base == frame_pointer_rtx
2857 || base == arg_pointer_rtx) && !disp)
2860 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2861 Avoid this by transforming to [%esi+0]. */
2862 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2863 && base && !index && !disp
2865 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2868 /* Special case: encode reg+reg instead of reg*2. */
2869 if (!base && index && scale && scale == 2)
2870 base = index, scale = 1;
2872 /* Special case: scaling cannot be encoded without base or displacement. */
2873 if (!base && !disp && index && scale != 1)
2884 /* Return cost of the memory address x.
2885 For i386, it is better to use a complex address than let gcc copy
2886 the address into a reg and make a new pseudo. But not if the address
2887 requires to two regs - that would mean more pseudos with longer
2890 ix86_address_cost (x)
2893 struct ix86_address parts;
2896 if (!ix86_decompose_address (x, &parts))
2899 /* More complex memory references are better. */
2900 if (parts.disp && parts.disp != const0_rtx)
2903 /* Attempt to minimize number of registers in the address. */
2905 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2907 && (!REG_P (parts.index)
2908 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2912 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2914 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2915 && parts.base != parts.index)
2918 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2919 since it's predecode logic can't detect the length of instructions
2920 and it degenerates to vector decoded. Increase cost of such
2921 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2922 to split such addresses or even refuse such addresses at all.
2924 Following addressing modes are affected:
2929 The first and last case may be avoidable by explicitly coding the zero in
2930 memory address, but I don't have AMD-K6 machine handy to check this
2934 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2935 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2936 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2942 /* If X is a machine specific address (i.e. a symbol or label being
2943 referenced as a displacement from the GOT implemented using an
2944 UNSPEC), then return the base term. Otherwise return X. */
2947 ix86_find_base_term (x)
2952 if (GET_CODE (x) != PLUS
2953 || XEXP (x, 0) != pic_offset_table_rtx
2954 || GET_CODE (XEXP (x, 1)) != CONST)
2957 term = XEXP (XEXP (x, 1), 0);
2959 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2960 term = XEXP (term, 0);
2962 if (GET_CODE (term) != UNSPEC
2963 || XVECLEN (term, 0) != 1
2964 || XINT (term, 1) != 7)
2967 term = XVECEXP (term, 0, 0);
2969 if (GET_CODE (term) != SYMBOL_REF
2970 && GET_CODE (term) != LABEL_REF)
2976 /* Determine if a given CONST RTX is a valid memory displacement
2980 legitimate_pic_address_disp_p (disp)
2983 if (GET_CODE (disp) != CONST)
2985 disp = XEXP (disp, 0);
2987 if (GET_CODE (disp) == PLUS)
2989 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2991 disp = XEXP (disp, 0);
2994 if (GET_CODE (disp) != UNSPEC
2995 || XVECLEN (disp, 0) != 1)
2998 /* Must be @GOT or @GOTOFF. */
2999 if (XINT (disp, 1) != 6
3000 && XINT (disp, 1) != 7)
3003 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3004 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
3010 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
3011 memory address for an instruction. The MODE argument is the machine mode
3012 for the MEM expression that wants to use this address.
3014 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
3015 convert common non-canonical forms to canonical form so that they will
3019 legitimate_address_p (mode, addr, strict)
3020 enum machine_mode mode;
3024 struct ix86_address parts;
3025 rtx base, index, disp;
3026 HOST_WIDE_INT scale;
3027 const char *reason = NULL;
3028 rtx reason_rtx = NULL_RTX;
3030 if (TARGET_DEBUG_ADDR)
3033 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3034 GET_MODE_NAME (mode), strict);
3038 if (! ix86_decompose_address (addr, &parts))
3040 reason = "decomposition failed";
3045 index = parts.index;
3047 scale = parts.scale;
3049 /* Validate base register.
3051 Don't allow SUBREG's here, it can lead to spill failures when the base
3052 is one word out of a two word structure, which is represented internally
3059 if (GET_CODE (base) != REG)
3061 reason = "base is not a register";
3065 if (GET_MODE (base) != Pmode)
3067 reason = "base is not in Pmode";
3071 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
3072 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3074 reason = "base is not valid";
3079 /* Validate index register.
3081 Don't allow SUBREG's here, it can lead to spill failures when the index
3082 is one word out of a two word structure, which is represented internally
3089 if (GET_CODE (index) != REG)
3091 reason = "index is not a register";
3095 if (GET_MODE (index) != Pmode)
3097 reason = "index is not in Pmode";
3101 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
3102 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3104 reason = "index is not valid";
3109 /* Validate scale factor. */
3112 reason_rtx = GEN_INT (scale);
3115 reason = "scale without index";
3119 if (scale != 2 && scale != 4 && scale != 8)
3121 reason = "scale is not a valid multiplier";
3126 /* Validate displacement. */
3131 if (!CONSTANT_ADDRESS_P (disp))
3133 reason = "displacement is not constant";
3139 if (!x86_64_sign_extended_value (disp))
3141 reason = "displacement is out of range";
3147 if (GET_CODE (disp) == CONST_DOUBLE)
3149 reason = "displacement is a const_double";
3154 if (flag_pic && SYMBOLIC_CONST (disp))
3156 if (TARGET_64BIT && (index || base))
3158 reason = "non-constant pic memory reference";
3161 if (! legitimate_pic_address_disp_p (disp))
3163 reason = "displacement is an invalid pic construct";
3167 /* This code used to verify that a symbolic pic displacement
3168 includes the pic_offset_table_rtx register.
3170 While this is good idea, unfortunately these constructs may
3171 be created by "adds using lea" optimization for incorrect
3180 This code is nonsensical, but results in addressing
3181 GOT table with pic_offset_table_rtx base. We can't
3182 just refuse it easilly, since it gets matched by
3183 "addsi3" pattern, that later gets split to lea in the
3184 case output register differs from input. While this
3185 can be handled by separate addsi pattern for this case
3186 that never results in lea, this seems to be easier and
3187 correct fix for crash to disable this test. */
3189 else if (HALF_PIC_P ())
3191 if (! HALF_PIC_ADDRESS_P (disp)
3192 || (base != NULL_RTX || index != NULL_RTX))
3194 reason = "displacement is an invalid half-pic reference";
3200 /* Everything looks valid. */
3201 if (TARGET_DEBUG_ADDR)
3202 fprintf (stderr, "Success.\n");
3206 if (TARGET_DEBUG_ADDR)
3208 fprintf (stderr, "Error: %s\n", reason);
3209 debug_rtx (reason_rtx);
3214 /* Return an unique alias set for the GOT. */
3216 static HOST_WIDE_INT
3217 ix86_GOT_alias_set ()
3219 static HOST_WIDE_INT set = -1;
3221 set = new_alias_set ();
3225 /* Return a legitimate reference for ORIG (an address) using the
3226 register REG. If REG is 0, a new pseudo is generated.
3228 There are two types of references that must be handled:
3230 1. Global data references must load the address from the GOT, via
3231 the PIC reg. An insn is emitted to do this load, and the reg is
3234 2. Static data references, constant pool addresses, and code labels
3235 compute the address as an offset from the GOT, whose base is in
3236 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
3237 differentiate them from global data objects. The returned
3238 address is the PIC reg + an unspec constant.
3240 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
3241 reg also appears in the address. */
3244 legitimize_pic_address (orig, reg)
3252 if (GET_CODE (addr) == LABEL_REF
3253 || (GET_CODE (addr) == SYMBOL_REF
3254 && (CONSTANT_POOL_ADDRESS_P (addr)
3255 || SYMBOL_REF_FLAG (addr))))
3257 /* This symbol may be referenced via a displacement from the PIC
3258 base address (@GOTOFF). */
3260 current_function_uses_pic_offset_table = 1;
3261 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
3262 new = gen_rtx_CONST (Pmode, new);
3263 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3267 emit_move_insn (reg, new);
3271 else if (GET_CODE (addr) == SYMBOL_REF)
3273 /* This symbol must be referenced via a load from the
3274 Global Offset Table (@GOT). */
3276 current_function_uses_pic_offset_table = 1;
3277 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
3278 new = gen_rtx_CONST (Pmode, new);
3279 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3280 new = gen_rtx_MEM (Pmode, new);
3281 RTX_UNCHANGING_P (new) = 1;
3282 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
3285 reg = gen_reg_rtx (Pmode);
3286 emit_move_insn (reg, new);
3291 if (GET_CODE (addr) == CONST)
3293 addr = XEXP (addr, 0);
3294 if (GET_CODE (addr) == UNSPEC)
3296 /* Check that the unspec is one of the ones we generate? */
3298 else if (GET_CODE (addr) != PLUS)
3301 if (GET_CODE (addr) == PLUS)
3303 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3305 /* Check first to see if this is a constant offset from a @GOTOFF
3306 symbol reference. */
3307 if ((GET_CODE (op0) == LABEL_REF
3308 || (GET_CODE (op0) == SYMBOL_REF
3309 && (CONSTANT_POOL_ADDRESS_P (op0)
3310 || SYMBOL_REF_FLAG (op0))))
3311 && GET_CODE (op1) == CONST_INT)
3313 current_function_uses_pic_offset_table = 1;
3314 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
3315 new = gen_rtx_PLUS (Pmode, new, op1);
3316 new = gen_rtx_CONST (Pmode, new);
3317 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3321 emit_move_insn (reg, new);
3327 base = legitimize_pic_address (XEXP (addr, 0), reg);
3328 new = legitimize_pic_address (XEXP (addr, 1),
3329 base == reg ? NULL_RTX : reg);
3331 if (GET_CODE (new) == CONST_INT)
3332 new = plus_constant (base, INTVAL (new));
3335 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
3337 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
3338 new = XEXP (new, 1);
3340 new = gen_rtx_PLUS (Pmode, base, new);
3348 /* Try machine-dependent ways of modifying an illegitimate address
3349 to be legitimate. If we find one, return the new, valid address.
3350 This macro is used in only one place: `memory_address' in explow.c.
3352 OLDX is the address as it was before break_out_memory_refs was called.
3353 In some cases it is useful to look at this to decide what needs to be done.
3355 MODE and WIN are passed so that this macro can use
3356 GO_IF_LEGITIMATE_ADDRESS.
3358 It is always safe for this macro to do nothing. It exists to recognize
3359 opportunities to optimize the output.
3361 For the 80386, we handle X+REG by loading X into a register R and
3362 using R+REG. R will go in a general reg and indexing will be used.
3363 However, if REG is a broken-out memory address or multiplication,
3364 nothing needs to be done because REG can certainly go in a general reg.
3366 When -fpic is used, special handling is needed for symbolic references.
3367 See comments by legitimize_pic_address in i386.c for details. */
3370 legitimize_address (x, oldx, mode)
3372 register rtx oldx ATTRIBUTE_UNUSED;
3373 enum machine_mode mode;
3378 if (TARGET_DEBUG_ADDR)
3380 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
3381 GET_MODE_NAME (mode));
3385 if (flag_pic && SYMBOLIC_CONST (x))
3386 return legitimize_pic_address (x, 0);
3388 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
3389 if (GET_CODE (x) == ASHIFT
3390 && GET_CODE (XEXP (x, 1)) == CONST_INT
3391 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3394 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
3395 GEN_INT (1 << log));
3398 if (GET_CODE (x) == PLUS)
3400 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
3402 if (GET_CODE (XEXP (x, 0)) == ASHIFT
3403 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3404 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3407 XEXP (x, 0) = gen_rtx_MULT (Pmode,
3408 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
3409 GEN_INT (1 << log));
3412 if (GET_CODE (XEXP (x, 1)) == ASHIFT
3413 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
3414 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3417 XEXP (x, 1) = gen_rtx_MULT (Pmode,
3418 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
3419 GEN_INT (1 << log));
3422 /* Put multiply first if it isn't already. */
3423 if (GET_CODE (XEXP (x, 1)) == MULT)
3425 rtx tmp = XEXP (x, 0);
3426 XEXP (x, 0) = XEXP (x, 1);
3431 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
3432 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
3433 created by virtual register instantiation, register elimination, and
3434 similar optimizations. */
3435 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
3438 x = gen_rtx_PLUS (Pmode,
3439 gen_rtx_PLUS (Pmode, XEXP (x, 0),
3440 XEXP (XEXP (x, 1), 0)),
3441 XEXP (XEXP (x, 1), 1));
3445 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3446 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
3447 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3449 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
3450 && CONSTANT_P (XEXP (x, 1)))
3453 rtx other = NULL_RTX;
3455 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3457 constant = XEXP (x, 1);
3458 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
3460 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
3462 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
3463 other = XEXP (x, 1);
3471 x = gen_rtx_PLUS (Pmode,
3472 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
3473 XEXP (XEXP (XEXP (x, 0), 1), 0)),
3474 plus_constant (other, INTVAL (constant)));
3478 if (changed && legitimate_address_p (mode, x, FALSE))
3481 if (GET_CODE (XEXP (x, 0)) == MULT)
3484 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
3487 if (GET_CODE (XEXP (x, 1)) == MULT)
3490 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
3494 && GET_CODE (XEXP (x, 1)) == REG
3495 && GET_CODE (XEXP (x, 0)) == REG)
3498 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
3501 x = legitimize_pic_address (x, 0);
3504 if (changed && legitimate_address_p (mode, x, FALSE))
3507 if (GET_CODE (XEXP (x, 0)) == REG)
3509 register rtx temp = gen_reg_rtx (Pmode);
3510 register rtx val = force_operand (XEXP (x, 1), temp);
3512 emit_move_insn (temp, val);
3518 else if (GET_CODE (XEXP (x, 1)) == REG)
3520 register rtx temp = gen_reg_rtx (Pmode);
3521 register rtx val = force_operand (XEXP (x, 0), temp);
3523 emit_move_insn (temp, val);
3533 /* Print an integer constant expression in assembler syntax. Addition
3534 and subtraction are the only arithmetic that may appear in these
3535 expressions. FILE is the stdio stream to write to, X is the rtx, and
3536 CODE is the operand print code from the output string. */
3539 output_pic_addr_const (file, x, code)
3546 switch (GET_CODE (x))
3556 assemble_name (file, XSTR (x, 0));
3557 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
3558 fputs ("@PLT", file);
3565 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
3566 assemble_name (asm_out_file, buf);
3570 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3574 /* This used to output parentheses around the expression,
3575 but that does not work on the 386 (either ATT or BSD assembler). */
3576 output_pic_addr_const (file, XEXP (x, 0), code);
3580 if (GET_MODE (x) == VOIDmode)
3582 /* We can use %d if the number is <32 bits and positive. */
3583 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
3584 fprintf (file, "0x%lx%08lx",
3585 (unsigned long) CONST_DOUBLE_HIGH (x),
3586 (unsigned long) CONST_DOUBLE_LOW (x));
3588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
3591 /* We can't handle floating point constants;
3592 PRINT_OPERAND must handle them. */
3593 output_operand_lossage ("floating constant misused");
3597 /* Some assemblers need integer constants to appear first. */
3598 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3600 output_pic_addr_const (file, XEXP (x, 0), code);
3602 output_pic_addr_const (file, XEXP (x, 1), code);
3604 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3606 output_pic_addr_const (file, XEXP (x, 1), code);
3608 output_pic_addr_const (file, XEXP (x, 0), code);
3615 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3616 output_pic_addr_const (file, XEXP (x, 0), code);
3618 output_pic_addr_const (file, XEXP (x, 1), code);
3619 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3623 if (XVECLEN (x, 0) != 1)
3625 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3626 switch (XINT (x, 1))
3629 fputs ("@GOT", file);
3632 fputs ("@GOTOFF", file);
3635 fputs ("@PLT", file);
3638 output_operand_lossage ("invalid UNSPEC as operand");
3644 output_operand_lossage ("invalid expression as operand");
3648 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3649 We need to handle our special PIC relocations. */
3652 i386_dwarf_output_addr_const (file, x)
3656 fprintf (file, "%s", INT_ASM_OP);
3658 output_pic_addr_const (file, x, '\0');
3660 output_addr_const (file, x);
3664 /* In the name of slightly smaller debug output, and to cater to
3665 general assembler losage, recognize PIC+GOTOFF and turn it back
3666 into a direct symbol reference. */
3669 i386_simplify_dwarf_addr (orig_x)
3674 if (GET_CODE (x) != PLUS
3675 || GET_CODE (XEXP (x, 0)) != REG
3676 || GET_CODE (XEXP (x, 1)) != CONST)
3679 x = XEXP (XEXP (x, 1), 0);
3680 if (GET_CODE (x) == UNSPEC
3681 && (XINT (x, 1) == 6
3682 || XINT (x, 1) == 7))
3683 return XVECEXP (x, 0, 0);
3685 if (GET_CODE (x) == PLUS
3686 && GET_CODE (XEXP (x, 0)) == UNSPEC
3687 && GET_CODE (XEXP (x, 1)) == CONST_INT
3688 && (XINT (XEXP (x, 0), 1) == 6
3689 || XINT (XEXP (x, 0), 1) == 7))
3690 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3696 put_condition_code (code, mode, reverse, fp, file)
3698 enum machine_mode mode;
3704 if (mode == CCFPmode || mode == CCFPUmode)
3706 enum rtx_code second_code, bypass_code;
3707 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3708 if (bypass_code != NIL || second_code != NIL)
3710 code = ix86_fp_compare_code_to_integer (code);
3714 code = reverse_condition (code);
3725 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3730 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3731 Those same assemblers have the same but opposite losage on cmov. */
3734 suffix = fp ? "nbe" : "a";
3737 if (mode == CCNOmode || mode == CCGOCmode)
3739 else if (mode == CCmode || mode == CCGCmode)
3750 if (mode == CCNOmode || mode == CCGOCmode)
3752 else if (mode == CCmode || mode == CCGCmode)
3761 suffix = fp ? "nb" : "ae";
3764 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3774 suffix = fp ? "u" : "p";
3777 suffix = fp ? "nu" : "np";
3782 fputs (suffix, file);
3786 print_reg (x, code, file)
3791 if (REGNO (x) == ARG_POINTER_REGNUM
3792 || REGNO (x) == FRAME_POINTER_REGNUM
3793 || REGNO (x) == FLAGS_REG
3794 || REGNO (x) == FPSR_REG)
3797 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3800 if (code == 'w' || MMX_REG_P (x))
3802 else if (code == 'b')
3804 else if (code == 'k')
3806 else if (code == 'q')
3808 else if (code == 'y')
3810 else if (code == 'h')
3813 code = GET_MODE_SIZE (GET_MODE (x));
3815 /* Irritatingly, AMD extended registers use different naming convention
3816 from the normal registers. */
3817 if (REX_INT_REG_P (x))
3824 error ("Extended registers have no high halves\n");
3827 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
3830 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
3833 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
3836 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
3839 error ("Unsupported operand size for extended register.\n");
3847 if (STACK_TOP_P (x))
3849 fputs ("st(0)", file);
3856 if (! ANY_FP_REG_P (x))
3857 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
3861 fputs (hi_reg_name[REGNO (x)], file);
3864 fputs (qi_reg_name[REGNO (x)], file);
3867 fputs (qi_high_reg_name[REGNO (x)], file);
3875 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3876 C -- print opcode suffix for set/cmov insn.
3877 c -- like C, but print reversed condition
3878 F,f -- likewise, but for floating-point.
3879 R -- print the prefix for register names.
3880 z -- print the opcode suffix for the size of the current operand.
3881 * -- print a star (in certain assembler syntax)
3882 A -- print an absolute memory reference.
3883 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3884 s -- print a shift double count, followed by the assemblers argument
3886 b -- print the QImode name of the register for the indicated operand.
3887 %b0 would print %al if operands[0] is reg 0.
3888 w -- likewise, print the HImode name of the register.
3889 k -- likewise, print the SImode name of the register.
3890 q -- likewise, print the DImode name of the register.
3891 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3892 y -- print "st(0)" instead of "st" as a register.
3893 D -- print condition for SSE cmp instruction.
3894 P -- if PIC, print an @PLT suffix.
3895 X -- don't print any sort of PIC '@' suffix for a symbol.
3899 print_operand (file, x, code)
3909 if (ASSEMBLER_DIALECT == 0)
3914 if (ASSEMBLER_DIALECT == 0)
3916 else if (ASSEMBLER_DIALECT == 1)
3918 /* Intel syntax. For absolute addresses, registers should not
3919 be surrounded by braces. */
3920 if (GET_CODE (x) != REG)
3923 PRINT_OPERAND (file, x, 0);
3929 PRINT_OPERAND (file, x, 0);
3934 if (ASSEMBLER_DIALECT == 0)
3939 if (ASSEMBLER_DIALECT == 0)
3944 if (ASSEMBLER_DIALECT == 0)
3949 if (ASSEMBLER_DIALECT == 0)
3954 if (ASSEMBLER_DIALECT == 0)
3959 if (ASSEMBLER_DIALECT == 0)
3964 /* 387 opcodes don't get size suffixes if the operands are
3967 if (STACK_REG_P (x))
3970 /* this is the size of op from size of operand */
3971 switch (GET_MODE_SIZE (GET_MODE (x)))
3974 #ifdef HAVE_GAS_FILDS_FISTS
3980 if (GET_MODE (x) == SFmode)
3995 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3997 #ifdef GAS_MNEMONICS
4023 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
4025 PRINT_OPERAND (file, x, 0);
4031 /* Little bit of braindamage here. The SSE compare instructions
4032 does use completely different names for the comparisons that the
4033 fp conditional moves. */
4034 switch (GET_CODE (x))
4049 fputs ("unord", file);
4053 fputs ("neq", file);
4057 fputs ("nlt", file);
4061 fputs ("nle", file);
4064 fputs ("ord", file);
4072 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
4075 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
4078 /* Like above, but reverse condition */
4080 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
4083 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
4089 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
4092 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4095 int pred_val = INTVAL (XEXP (x, 0));
4097 if (pred_val < REG_BR_PROB_BASE * 45 / 100
4098 || pred_val > REG_BR_PROB_BASE * 55 / 100)
4100 int taken = pred_val > REG_BR_PROB_BASE / 2;
4101 int cputaken = final_forward_branch_p (current_output_insn) == 0;
4103 /* Emit hints only in the case default branch prediction
4104 heruistics would fail. */
4105 if (taken != cputaken)
4107 /* We use 3e (DS) prefix for taken branches and
4108 2e (CS) prefix for not taken branches. */
4110 fputs ("ds ; ", file);
4112 fputs ("cs ; ", file);
4121 sprintf (str, "invalid operand code `%c'", code);
4122 output_operand_lossage (str);
4127 if (GET_CODE (x) == REG)
4129 PRINT_REG (x, code, file);
4132 else if (GET_CODE (x) == MEM)
4134 /* No `byte ptr' prefix for call instructions. */
4135 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
4138 switch (GET_MODE_SIZE (GET_MODE (x)))
4140 case 1: size = "BYTE"; break;
4141 case 2: size = "WORD"; break;
4142 case 4: size = "DWORD"; break;
4143 case 8: size = "QWORD"; break;
4144 case 12: size = "XWORD"; break;
4145 case 16: size = "XMMWORD"; break;
4150 /* Check for explicit size override (codes 'b', 'w' and 'k') */
4153 else if (code == 'w')
4155 else if (code == 'k')
4159 fputs (" PTR ", file);
4163 if (flag_pic && CONSTANT_ADDRESS_P (x))
4164 output_pic_addr_const (file, x, code);
4165 /* Avoid (%rip) for call operands. */
4166 else if (CONSTANT_ADDRESS_P (x) && code =='P'
4167 && GET_CODE (x) != CONST_INT)
4168 output_addr_const (file, x);
4173 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
4178 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4179 REAL_VALUE_TO_TARGET_SINGLE (r, l);
4181 if (ASSEMBLER_DIALECT == 0)
4183 fprintf (file, "0x%lx", l);
4186 /* These float cases don't actually occur as immediate operands. */
4187 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
4192 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4193 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4194 fprintf (file, "%s", dstr);
4197 else if (GET_CODE (x) == CONST_DOUBLE
4198 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
4203 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4204 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
4205 fprintf (file, "%s", dstr);
4211 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
4213 if (ASSEMBLER_DIALECT == 0)
4216 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
4217 || GET_CODE (x) == LABEL_REF)
4219 if (ASSEMBLER_DIALECT == 0)
4222 fputs ("OFFSET FLAT:", file);
4225 if (GET_CODE (x) == CONST_INT)
4226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4228 output_pic_addr_const (file, x, code);
4230 output_addr_const (file, x);
4234 /* Print a memory operand whose address is ADDR. */
4237 print_operand_address (file, addr)
4241 struct ix86_address parts;
4242 rtx base, index, disp;
4245 if (! ix86_decompose_address (addr, &parts))
4249 index = parts.index;
4251 scale = parts.scale;
4253 if (!base && !index)
4255 /* Displacement only requires special attention. */
4257 if (GET_CODE (disp) == CONST_INT)
4259 if (ASSEMBLER_DIALECT != 0)
4261 if (USER_LABEL_PREFIX[0] == 0)
4263 fputs ("ds:", file);
4265 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
4268 output_pic_addr_const (file, addr, 0);
4270 output_addr_const (file, addr);
4272 /* Use one byte shorter RIP relative addressing for 64bit mode. */
4273 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
4274 fputs ("(%rip)", file);
4278 if (ASSEMBLER_DIALECT == 0)
4283 output_pic_addr_const (file, disp, 0);
4284 else if (GET_CODE (disp) == LABEL_REF)
4285 output_asm_label (disp);
4287 output_addr_const (file, disp);
4292 PRINT_REG (base, 0, file);
4296 PRINT_REG (index, 0, file);
4298 fprintf (file, ",%d", scale);
4304 rtx offset = NULL_RTX;
4308 /* Pull out the offset of a symbol; print any symbol itself. */
4309 if (GET_CODE (disp) == CONST
4310 && GET_CODE (XEXP (disp, 0)) == PLUS
4311 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
4313 offset = XEXP (XEXP (disp, 0), 1);
4314 disp = gen_rtx_CONST (VOIDmode,
4315 XEXP (XEXP (disp, 0), 0));
4319 output_pic_addr_const (file, disp, 0);
4320 else if (GET_CODE (disp) == LABEL_REF)
4321 output_asm_label (disp);
4322 else if (GET_CODE (disp) == CONST_INT)
4325 output_addr_const (file, disp);
4331 PRINT_REG (base, 0, file);
4334 if (INTVAL (offset) >= 0)
4336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
4347 PRINT_REG (index, 0, file);
4349 fprintf (file, "*%d", scale);
4356 /* Split one or more DImode RTL references into pairs of SImode
4357 references. The RTL can be REG, offsettable MEM, integer constant, or
4358 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
4359 split and "num" is its length. lo_half and hi_half are output arrays
4360 that parallel "operands". */
4363 split_di (operands, num, lo_half, hi_half)
4366 rtx lo_half[], hi_half[];
4370 rtx op = operands[num];
4371 if (CONSTANT_P (op))
4372 split_double (op, &lo_half[num], &hi_half[num]);
4373 else if (! reload_completed)
4375 lo_half[num] = gen_lowpart (SImode, op);
4376 hi_half[num] = gen_highpart (SImode, op);
4378 else if (GET_CODE (op) == REG)
4382 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
4383 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
4385 else if (offsettable_memref_p (op))
4387 rtx lo_addr = XEXP (op, 0);
4388 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
4389 lo_half[num] = change_address (op, SImode, lo_addr);
4390 hi_half[num] = change_address (op, SImode, hi_addr);
4397 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
4398 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
4399 is the expression of the binary operation. The output may either be
4400 emitted here, or returned to the caller, like all output_* functions.
4402 There is no guarantee that the operands are the same mode, as they
4403 might be within FLOAT or FLOAT_EXTEND expressions. */
4405 #ifndef SYSV386_COMPAT
4406 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
4407 wants to fix the assemblers because that causes incompatibility
4408 with gcc. No-one wants to fix gcc because that causes
4409 incompatibility with assemblers... You can use the option of
4410 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
4411 #define SYSV386_COMPAT 1
4415 output_387_binary_op (insn, operands)
4419 static char buf[30];
4422 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
4424 #ifdef ENABLE_CHECKING
4425 /* Even if we do not want to check the inputs, this documents input
4426 constraints. Which helps in understanding the following code. */
4427 if (STACK_REG_P (operands[0])
4428 && ((REG_P (operands[1])
4429 && REGNO (operands[0]) == REGNO (operands[1])
4430 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
4431 || (REG_P (operands[2])
4432 && REGNO (operands[0]) == REGNO (operands[2])
4433 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
4434 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
4440 switch (GET_CODE (operands[3]))
4443 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4444 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4452 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4453 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4461 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4462 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4470 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
4471 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
4485 if (GET_MODE (operands[0]) == SFmode)
4486 strcat (buf, "ss\t{%2, %0|%0, %2}");
4488 strcat (buf, "sd\t{%2, %0|%0, %2}");
4493 switch (GET_CODE (operands[3]))
4497 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
4499 rtx temp = operands[2];
4500 operands[2] = operands[1];
4504 /* know operands[0] == operands[1]. */
4506 if (GET_CODE (operands[2]) == MEM)
4512 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4514 if (STACK_TOP_P (operands[0]))
4515 /* How is it that we are storing to a dead operand[2]?
4516 Well, presumably operands[1] is dead too. We can't
4517 store the result to st(0) as st(0) gets popped on this
4518 instruction. Instead store to operands[2] (which I
4519 think has to be st(1)). st(1) will be popped later.
4520 gcc <= 2.8.1 didn't have this check and generated
4521 assembly code that the Unixware assembler rejected. */
4522 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4524 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4528 if (STACK_TOP_P (operands[0]))
4529 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4531 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4536 if (GET_CODE (operands[1]) == MEM)
4542 if (GET_CODE (operands[2]) == MEM)
4548 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
4551 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
4552 derived assemblers, confusingly reverse the direction of
4553 the operation for fsub{r} and fdiv{r} when the
4554 destination register is not st(0). The Intel assembler
4555 doesn't have this brain damage. Read !SYSV386_COMPAT to
4556 figure out what the hardware really does. */
4557 if (STACK_TOP_P (operands[0]))
4558 p = "{p\t%0, %2|rp\t%2, %0}";
4560 p = "{rp\t%2, %0|p\t%0, %2}";
4562 if (STACK_TOP_P (operands[0]))
4563 /* As above for fmul/fadd, we can't store to st(0). */
4564 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
4566 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
4571 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
4574 if (STACK_TOP_P (operands[0]))
4575 p = "{rp\t%0, %1|p\t%1, %0}";
4577 p = "{p\t%1, %0|rp\t%0, %1}";
4579 if (STACK_TOP_P (operands[0]))
4580 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
4582 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
4587 if (STACK_TOP_P (operands[0]))
4589 if (STACK_TOP_P (operands[1]))
4590 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
4592 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
4595 else if (STACK_TOP_P (operands[1]))
4598 p = "{\t%1, %0|r\t%0, %1}";
4600 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
4606 p = "{r\t%2, %0|\t%0, %2}";
4608 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
4621 /* Output code for INSN to convert a float to a signed int. OPERANDS
4622 are the insn operands. The output may be [HSD]Imode and the input
4623 operand may be [SDX]Fmode. */
4626 output_fix_trunc (insn, operands)
4630 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4631 int dimode_p = GET_MODE (operands[0]) == DImode;
4634 /* Jump through a hoop or two for DImode, since the hardware has no
4635 non-popping instruction. We used to do this a different way, but
4636 that was somewhat fragile and broke with post-reload splitters. */
4637 if (dimode_p && !stack_top_dies)
4638 output_asm_insn ("fld\t%y1", operands);
4640 if (! STACK_TOP_P (operands[1]))
4643 xops[0] = GEN_INT (12);
4644 xops[1] = adj_offsettable_operand (operands[2], 1);
4645 xops[1] = change_address (xops[1], QImode, NULL_RTX);
4647 xops[2] = operands[0];
4648 if (GET_CODE (operands[0]) != MEM)
4649 xops[2] = operands[3];
4651 output_asm_insn ("fnstcw\t%2", operands);
4652 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
4653 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
4654 output_asm_insn ("fldcw\t%2", operands);
4655 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
4657 if (stack_top_dies || dimode_p)
4658 output_asm_insn ("fistp%z2\t%2", xops);
4660 output_asm_insn ("fist%z2\t%2", xops);
4662 output_asm_insn ("fldcw\t%2", operands);
4664 if (GET_CODE (operands[0]) != MEM)
4668 split_di (operands+0, 1, xops+0, xops+1);
4669 split_di (operands+3, 1, xops+2, xops+3);
4670 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4671 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
4673 else if (GET_MODE (operands[0]) == SImode)
4674 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
4676 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
4682 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
4683 should be used and 2 when fnstsw should be used. UNORDERED_P is true
4684 when fucom should be used. */
4687 output_fp_compare (insn, operands, eflags_p, unordered_p)
4690 int eflags_p, unordered_p;
4693 rtx cmp_op0 = operands[0];
4694 rtx cmp_op1 = operands[1];
4695 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
4700 cmp_op1 = operands[2];
4704 if (GET_MODE (operands[0]) == SFmode)
4706 return "ucomiss\t{%1, %0|%0, %1}";
4708 return "comiss\t{%1, %0|%0, %y}";
4711 return "ucomisd\t{%1, %0|%0, %1}";
4713 return "comisd\t{%1, %0|%0, %y}";
4716 if (! STACK_TOP_P (cmp_op0))
4719 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
4721 if (STACK_REG_P (cmp_op1)
4723 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
4724 && REGNO (cmp_op1) != FIRST_STACK_REG)
4726 /* If both the top of the 387 stack dies, and the other operand
4727 is also a stack register that dies, then this must be a
4728 `fcompp' float compare */
4732 /* There is no double popping fcomi variant. Fortunately,
4733 eflags is immune from the fstp's cc clobbering. */
4735 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
4737 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4745 return "fucompp\n\tfnstsw\t%0";
4747 return "fcompp\n\tfnstsw\t%0";
4760 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4762 static const char * const alt[24] =
4774 "fcomi\t{%y1, %0|%0, %y1}",
4775 "fcomip\t{%y1, %0|%0, %y1}",
4776 "fucomi\t{%y1, %0|%0, %y1}",
4777 "fucomip\t{%y1, %0|%0, %y1}",
4784 "fcom%z2\t%y2\n\tfnstsw\t%0",
4785 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4786 "fucom%z2\t%y2\n\tfnstsw\t%0",
4787 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4789 "ficom%z2\t%y2\n\tfnstsw\t%0",
4790 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4798 mask = eflags_p << 3;
4799 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4800 mask |= unordered_p << 1;
4801 mask |= stack_top_dies;
4813 /* Output assembler code to FILE to initialize basic-block profiling.
4815 If profile_block_flag == 2
4817 Output code to call the subroutine `__bb_init_trace_func'
4818 and pass two parameters to it. The first parameter is
4819 the address of a block allocated in the object module.
4820 The second parameter is the number of the first basic block
4823 The name of the block is a local symbol made with this statement:
4825 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4827 Of course, since you are writing the definition of
4828 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4829 can take a short cut in the definition of this macro and use the
4830 name that you know will result.
4832 The number of the first basic block of the function is
4833 passed to the macro in BLOCK_OR_LABEL.
4835 If described in a virtual assembler language the code to be
4839 parameter2 <- BLOCK_OR_LABEL
4840 call __bb_init_trace_func
4842 else if profile_block_flag != 0
4844 Output code to call the subroutine `__bb_init_func'
4845 and pass one single parameter to it, which is the same
4846 as the first parameter to `__bb_init_trace_func'.
4848 The first word of this parameter is a flag which will be nonzero if
4849 the object module has already been initialized. So test this word
4850 first, and do not call `__bb_init_func' if the flag is nonzero.
4851 Note: When profile_block_flag == 2 the test need not be done
4852 but `__bb_init_trace_func' *must* be called.
4854 BLOCK_OR_LABEL may be used to generate a label number as a
4855 branch destination in case `__bb_init_func' will not be called.
4857 If described in a virtual assembler language the code to be
4868 ix86_output_function_block_profiler (file, block_or_label)
4872 static int num_func = 0;
4874 char block_table[80], false_label[80];
4876 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4878 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4879 xops[5] = stack_pointer_rtx;
4880 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4882 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4884 switch (profile_block_flag)
4887 xops[2] = GEN_INT (block_or_label);
4888 xops[3] = gen_rtx_MEM (Pmode,
4889 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4890 xops[6] = GEN_INT (8);
4892 output_asm_insn ("push{l}\t%2", xops);
4894 output_asm_insn ("push{l}\t%1", xops);
4897 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4898 output_asm_insn ("push{l}\t%7", xops);
4900 output_asm_insn ("call\t%P3", xops);
4901 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4905 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4907 xops[0] = const0_rtx;
4908 xops[2] = gen_rtx_MEM (Pmode,
4909 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4910 xops[3] = gen_rtx_MEM (Pmode,
4911 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4912 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4913 xops[6] = GEN_INT (4);
4915 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4917 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4918 output_asm_insn ("jne\t%2", xops);
4921 output_asm_insn ("push{l}\t%1", xops);
4924 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4925 output_asm_insn ("push{l}\t%7", xops);
4927 output_asm_insn ("call\t%P3", xops);
4928 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4929 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4935 /* Output assembler code to FILE to increment a counter associated
4936 with basic block number BLOCKNO.
4938 If profile_block_flag == 2
4940 Output code to initialize the global structure `__bb' and
4941 call the function `__bb_trace_func' which will increment the
4944 `__bb' consists of two words. In the first word the number
4945 of the basic block has to be stored. In the second word
4946 the address of a block allocated in the object module
4949 The basic block number is given by BLOCKNO.
4951 The address of the block is given by the label created with
4953 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4955 by FUNCTION_BLOCK_PROFILER.
4957 Of course, since you are writing the definition of
4958 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4959 can take a short cut in the definition of this macro and use the
4960 name that you know will result.
4962 If described in a virtual assembler language the code to be
4965 move BLOCKNO -> (__bb)
4966 move LPBX0 -> (__bb+4)
4967 call __bb_trace_func
4969 Note that function `__bb_trace_func' must not change the
4970 machine state, especially the flag register. To grant
4971 this, you must output code to save and restore registers
4972 either in this macro or in the macros MACHINE_STATE_SAVE
4973 and MACHINE_STATE_RESTORE. The last two macros will be
4974 used in the function `__bb_trace_func', so you must make
4975 sure that the function prologue does not change any
4976 register prior to saving it with MACHINE_STATE_SAVE.
4978 else if profile_block_flag != 0
4980 Output code to increment the counter directly.
4981 Basic blocks are numbered separately from zero within each
4982 compiled object module. The count associated with block number
4983 BLOCKNO is at index BLOCKNO in an array of words; the name of
4984 this array is a local symbol made with this statement:
4986 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4988 Of course, since you are writing the definition of
4989 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4990 can take a short cut in the definition of this macro and use the
4991 name that you know will result.
4993 If described in a virtual assembler language the code to be
4996 inc (LPBX2+4*BLOCKNO)
5000 ix86_output_block_profiler (file, blockno)
5001 FILE *file ATTRIBUTE_UNUSED;
5004 rtx xops[8], cnt_rtx;
5006 char *block_table = counts;
5008 switch (profile_block_flag)
5011 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
5013 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
5014 xops[2] = GEN_INT (blockno);
5015 xops[3] = gen_rtx_MEM (Pmode,
5016 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
5017 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
5018 xops[5] = plus_constant (xops[4], 4);
5019 xops[0] = gen_rtx_MEM (SImode, xops[4]);
5020 xops[6] = gen_rtx_MEM (SImode, xops[5]);
5022 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
5024 output_asm_insn ("pushf", xops);
5025 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5028 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
5029 output_asm_insn ("push{l}\t%7", xops);
5030 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
5031 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
5032 output_asm_insn ("pop{l}\t%7", xops);
5035 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
5036 output_asm_insn ("call\t%P3", xops);
5037 output_asm_insn ("popf", xops);
5042 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
5043 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
5044 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
5047 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
5050 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
5052 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
5053 output_asm_insn ("inc{l}\t%0", xops);
5060 ix86_expand_move (mode, operands)
5061 enum machine_mode mode;
5064 int strict = (reload_in_progress || reload_completed);
5067 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
5069 /* Emit insns to move operands[1] into operands[0]. */
5071 if (GET_CODE (operands[0]) == MEM)
5072 operands[1] = force_reg (Pmode, operands[1]);
5075 rtx temp = operands[0];
5076 if (GET_CODE (temp) != REG)
5077 temp = gen_reg_rtx (Pmode);
5078 temp = legitimize_pic_address (operands[1], temp);
5079 if (temp == operands[0])
5086 if (GET_CODE (operands[0]) == MEM
5087 && (GET_MODE (operands[0]) == QImode
5088 || !push_operand (operands[0], mode))
5089 && GET_CODE (operands[1]) == MEM)
5090 operands[1] = force_reg (mode, operands[1]);
5092 if (push_operand (operands[0], mode)
5093 && ! general_no_elim_operand (operands[1], mode))
5094 operands[1] = copy_to_mode_reg (mode, operands[1]);
5096 if (FLOAT_MODE_P (mode))
5098 /* If we are loading a floating point constant to a register,
5099 force the value to memory now, since we'll get better code
5100 out the back end. */
5104 else if (GET_CODE (operands[1]) == CONST_DOUBLE
5105 && register_operand (operands[0], mode))
5106 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
5110 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
5115 /* Attempt to expand a binary operator. Make the expansion closer to the
5116 actual machine, then just general_operand, which will allow 3 separate
5117 memory references (one output, two input) in a single insn. */
5120 ix86_expand_binary_operator (code, mode, operands)
5122 enum machine_mode mode;
5125 int matching_memory;
5126 rtx src1, src2, dst, op, clob;
5132 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
5133 if (GET_RTX_CLASS (code) == 'c'
5134 && (rtx_equal_p (dst, src2)
5135 || immediate_operand (src1, mode)))
5142 /* If the destination is memory, and we do not have matching source
5143 operands, do things in registers. */
5144 matching_memory = 0;
5145 if (GET_CODE (dst) == MEM)
5147 if (rtx_equal_p (dst, src1))
5148 matching_memory = 1;
5149 else if (GET_RTX_CLASS (code) == 'c'
5150 && rtx_equal_p (dst, src2))
5151 matching_memory = 2;
5153 dst = gen_reg_rtx (mode);
5156 /* Both source operands cannot be in memory. */
5157 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
5159 if (matching_memory != 2)
5160 src2 = force_reg (mode, src2);
5162 src1 = force_reg (mode, src1);
5165 /* If the operation is not commutable, source 1 cannot be a constant
5166 or non-matching memory. */
5167 if ((CONSTANT_P (src1)
5168 || (!matching_memory && GET_CODE (src1) == MEM))
5169 && GET_RTX_CLASS (code) != 'c')
5170 src1 = force_reg (mode, src1);
5172 /* If optimizing, copy to regs to improve CSE */
5173 if (optimize && ! no_new_pseudos)
5175 if (GET_CODE (dst) == MEM)
5176 dst = gen_reg_rtx (mode);
5177 if (GET_CODE (src1) == MEM)
5178 src1 = force_reg (mode, src1);
5179 if (GET_CODE (src2) == MEM)
5180 src2 = force_reg (mode, src2);
5183 /* Emit the instruction. */
5185 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
5186 if (reload_in_progress)
5188 /* Reload doesn't know about the flags register, and doesn't know that
5189 it doesn't want to clobber it. We can only do this with PLUS. */
5196 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5197 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5200 /* Fix up the destination if needed. */
5201 if (dst != operands[0])
5202 emit_move_insn (operands[0], dst);
5205 /* Return TRUE or FALSE depending on whether the binary operator meets the
5206 appropriate constraints. */
5209 ix86_binary_operator_ok (code, mode, operands)
5211 enum machine_mode mode ATTRIBUTE_UNUSED;
5214 /* Both source operands cannot be in memory. */
5215 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
5217 /* If the operation is not commutable, source 1 cannot be a constant. */
5218 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
5220 /* If the destination is memory, we must have a matching source operand. */
5221 if (GET_CODE (operands[0]) == MEM
5222 && ! (rtx_equal_p (operands[0], operands[1])
5223 || (GET_RTX_CLASS (code) == 'c'
5224 && rtx_equal_p (operands[0], operands[2]))))
5226 /* If the operation is not commutable and the source 1 is memory, we must
5227 have a matching destionation. */
5228 if (GET_CODE (operands[1]) == MEM
5229 && GET_RTX_CLASS (code) != 'c'
5230 && ! rtx_equal_p (operands[0], operands[1]))
5235 /* Attempt to expand a unary operator. Make the expansion closer to the
5236 actual machine, then just general_operand, which will allow 2 separate
5237 memory references (one output, one input) in a single insn. */
5240 ix86_expand_unary_operator (code, mode, operands)
5242 enum machine_mode mode;
5245 int matching_memory;
5246 rtx src, dst, op, clob;
5251 /* If the destination is memory, and we do not have matching source
5252 operands, do things in registers. */
5253 matching_memory = 0;
5254 if (GET_CODE (dst) == MEM)
5256 if (rtx_equal_p (dst, src))
5257 matching_memory = 1;
5259 dst = gen_reg_rtx (mode);
5262 /* When source operand is memory, destination must match. */
5263 if (!matching_memory && GET_CODE (src) == MEM)
5264 src = force_reg (mode, src);
5266 /* If optimizing, copy to regs to improve CSE */
5267 if (optimize && ! no_new_pseudos)
5269 if (GET_CODE (dst) == MEM)
5270 dst = gen_reg_rtx (mode);
5271 if (GET_CODE (src) == MEM)
5272 src = force_reg (mode, src);
5275 /* Emit the instruction. */
5277 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
5278 if (reload_in_progress || code == NOT)
5280 /* Reload doesn't know about the flags register, and doesn't know that
5281 it doesn't want to clobber it. */
5288 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5289 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
5292 /* Fix up the destination if needed. */
5293 if (dst != operands[0])
5294 emit_move_insn (operands[0], dst);
5297 /* Return TRUE or FALSE depending on whether the unary operator meets the
5298 appropriate constraints. */
5301 ix86_unary_operator_ok (code, mode, operands)
5302 enum rtx_code code ATTRIBUTE_UNUSED;
5303 enum machine_mode mode ATTRIBUTE_UNUSED;
5304 rtx operands[2] ATTRIBUTE_UNUSED;
5306 /* If one of operands is memory, source and destination must match. */
5307 if ((GET_CODE (operands[0]) == MEM
5308 || GET_CODE (operands[1]) == MEM)
5309 && ! rtx_equal_p (operands[0], operands[1]))
5314 /* Return TRUE or FALSE depending on whether the first SET in INSN
5315 has source and destination with matching CC modes, and that the
5316 CC mode is at least as constrained as REQ_MODE. */
5319 ix86_match_ccmode (insn, req_mode)
5321 enum machine_mode req_mode;
5324 enum machine_mode set_mode;
5326 set = PATTERN (insn);
5327 if (GET_CODE (set) == PARALLEL)
5328 set = XVECEXP (set, 0, 0);
5329 if (GET_CODE (set) != SET)
5331 if (GET_CODE (SET_SRC (set)) != COMPARE)
5334 set_mode = GET_MODE (SET_DEST (set));
5338 if (req_mode != CCNOmode
5339 && (req_mode != CCmode
5340 || XEXP (SET_SRC (set), 1) != const0_rtx))
5344 if (req_mode == CCGCmode)
5348 if (req_mode == CCGOCmode || req_mode == CCNOmode)
5352 if (req_mode == CCZmode)
5362 return (GET_MODE (SET_SRC (set)) == set_mode);
5365 /* Generate insn patterns to do an integer compare of OPERANDS. */
5368 ix86_expand_int_compare (code, op0, op1)
5372 enum machine_mode cmpmode;
5375 cmpmode = SELECT_CC_MODE (code, op0, op1);
5376 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
5378 /* This is very simple, but making the interface the same as in the
5379 FP case makes the rest of the code easier. */
5380 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
5381 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
5383 /* Return the test that should be put into the flags user, i.e.
5384 the bcc, scc, or cmov instruction. */
5385 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
5388 /* Figure out whether to use ordered or unordered fp comparisons.
5389 Return the appropriate mode to use. */
5392 ix86_fp_compare_mode (code)
5393 enum rtx_code code ATTRIBUTE_UNUSED;
5395 /* ??? In order to make all comparisons reversible, we do all comparisons
5396 non-trapping when compiling for IEEE. Once gcc is able to distinguish
5397 all forms trapping and nontrapping comparisons, we can make inequality
5398 comparisons trapping again, since it results in better code when using
5399 FCOM based compares. */
5400 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
5404 ix86_cc_mode (code, op0, op1)
5408 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5409 return ix86_fp_compare_mode (code);
5412 /* Only zero flag is needed. */
5414 case NE: /* ZF!=0 */
5416 /* Codes needing carry flag. */
5417 case GEU: /* CF=0 */
5418 case GTU: /* CF=0 & ZF=0 */
5419 case LTU: /* CF=1 */
5420 case LEU: /* CF=1 | ZF=1 */
5422 /* Codes possibly doable only with sign flag when
5423 comparing against zero. */
5424 case GE: /* SF=OF or SF=0 */
5425 case LT: /* SF<>OF or SF=1 */
5426 if (op1 == const0_rtx)
5429 /* For other cases Carry flag is not required. */
5431 /* Codes doable only with sign flag when comparing
5432 against zero, but we miss jump instruction for it
5433 so we need to use relational tests agains overflow
5434 that thus needs to be zero. */
5435 case GT: /* ZF=0 & SF=OF */
5436 case LE: /* ZF=1 | SF<>OF */
5437 if (op1 == const0_rtx)
5446 /* Return true if we should use an FCOMI instruction for this fp comparison. */
5449 ix86_use_fcomi_compare (code)
5450 enum rtx_code code ATTRIBUTE_UNUSED;
5452 enum rtx_code swapped_code = swap_condition (code);
5453 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
5454 || (ix86_fp_comparison_cost (swapped_code)
5455 == ix86_fp_comparison_fcomi_cost (swapped_code)));
5458 /* Swap, force into registers, or otherwise massage the two operands
5459 to a fp comparison. The operands are updated in place; the new
5460 comparsion code is returned. */
5462 static enum rtx_code
5463 ix86_prepare_fp_compare_args (code, pop0, pop1)
5467 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
5468 rtx op0 = *pop0, op1 = *pop1;
5469 enum machine_mode op_mode = GET_MODE (op0);
5470 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
5472 /* All of the unordered compare instructions only work on registers.
5473 The same is true of the XFmode compare instructions. The same is
5474 true of the fcomi compare instructions. */
5477 && (fpcmp_mode == CCFPUmode
5478 || op_mode == XFmode
5479 || op_mode == TFmode
5480 || ix86_use_fcomi_compare (code)))
5482 op0 = force_reg (op_mode, op0);
5483 op1 = force_reg (op_mode, op1);
5487 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
5488 things around if they appear profitable, otherwise force op0
5491 if (standard_80387_constant_p (op0) == 0
5492 || (GET_CODE (op0) == MEM
5493 && ! (standard_80387_constant_p (op1) == 0
5494 || GET_CODE (op1) == MEM)))
5497 tmp = op0, op0 = op1, op1 = tmp;
5498 code = swap_condition (code);
5501 if (GET_CODE (op0) != REG)
5502 op0 = force_reg (op_mode, op0);
5504 if (CONSTANT_P (op1))
5506 if (standard_80387_constant_p (op1))
5507 op1 = force_reg (op_mode, op1);
5509 op1 = validize_mem (force_const_mem (op_mode, op1));
5513 /* Try to rearrange the comparison to make it cheaper. */
5514 if (ix86_fp_comparison_cost (code)
5515 > ix86_fp_comparison_cost (swap_condition (code))
5516 && (GET_CODE (op0) == REG || !reload_completed))
5519 tmp = op0, op0 = op1, op1 = tmp;
5520 code = swap_condition (code);
5521 if (GET_CODE (op0) != REG)
5522 op0 = force_reg (op_mode, op0);
5530 /* Convert comparison codes we use to represent FP comparison to integer
5531 code that will result in proper branch. Return UNKNOWN if no such code
5533 static enum rtx_code
5534 ix86_fp_compare_code_to_integer (code)
5564 /* Split comparison code CODE into comparisons we can do using branch
5565 instructions. BYPASS_CODE is comparison code for branch that will
5566 branch around FIRST_CODE and SECOND_CODE. If some of branches
5567 is not required, set value to NIL.
5568 We never require more than two branches. */
5570 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
5571 enum rtx_code code, *bypass_code, *first_code, *second_code;
5577 /* The fcomi comparison sets flags as follows:
5587 case GT: /* GTU - CF=0 & ZF=0 */
5588 case GE: /* GEU - CF=0 */
5589 case ORDERED: /* PF=0 */
5590 case UNORDERED: /* PF=1 */
5591 case UNEQ: /* EQ - ZF=1 */
5592 case UNLT: /* LTU - CF=1 */
5593 case UNLE: /* LEU - CF=1 | ZF=1 */
5594 case LTGT: /* EQ - ZF=0 */
5596 case LT: /* LTU - CF=1 - fails on unordered */
5598 *bypass_code = UNORDERED;
5600 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
5602 *bypass_code = UNORDERED;
5604 case EQ: /* EQ - ZF=1 - fails on unordered */
5606 *bypass_code = UNORDERED;
5608 case NE: /* NE - ZF=0 - fails on unordered */
5610 *second_code = UNORDERED;
5612 case UNGE: /* GEU - CF=0 - fails on unordered */
5614 *second_code = UNORDERED;
5616 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
5618 *second_code = UNORDERED;
5623 if (!TARGET_IEEE_FP)
5630 /* Return cost of comparison done fcom + arithmetics operations on AX.
5631 All following functions do use number of instructions as an cost metrics.
5632 In future this should be tweaked to compute bytes for optimize_size and
5633 take into account performance of various instructions on various CPUs. */
5635 ix86_fp_comparison_arithmetics_cost (code)
5638 if (!TARGET_IEEE_FP)
5640 /* The cost of code output by ix86_expand_fp_compare. */
5668 /* Return cost of comparison done using fcomi operation.
5669 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5671 ix86_fp_comparison_fcomi_cost (code)
5674 enum rtx_code bypass_code, first_code, second_code;
5675 /* Return arbitarily high cost when instruction is not supported - this
5676 prevents gcc from using it. */
5679 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5680 return (bypass_code != NIL || second_code != NIL) + 2;
5683 /* Return cost of comparison done using sahf operation.
5684 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5686 ix86_fp_comparison_sahf_cost (code)
5689 enum rtx_code bypass_code, first_code, second_code;
5690 /* Return arbitarily high cost when instruction is not preferred - this
5691 avoids gcc from using it. */
5692 if (!TARGET_USE_SAHF && !optimize_size)
5694 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5695 return (bypass_code != NIL || second_code != NIL) + 3;
5698 /* Compute cost of the comparison done using any method.
5699 See ix86_fp_comparison_arithmetics_cost for the metrics. */
5701 ix86_fp_comparison_cost (code)
5704 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
5707 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
5708 sahf_cost = ix86_fp_comparison_sahf_cost (code);
5710 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
5711 if (min > sahf_cost)
5713 if (min > fcomi_cost)
5718 /* Generate insn patterns to do a floating point compare of OPERANDS. */
5721 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
5723 rtx op0, op1, scratch;
5727 enum machine_mode fpcmp_mode, intcmp_mode;
5729 int cost = ix86_fp_comparison_cost (code);
5730 enum rtx_code bypass_code, first_code, second_code;
5732 fpcmp_mode = ix86_fp_compare_mode (code);
5733 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
5736 *second_test = NULL_RTX;
5738 *bypass_test = NULL_RTX;
5740 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5742 /* Do fcomi/sahf based test when profitable. */
5743 if ((bypass_code == NIL || bypass_test)
5744 && (second_code == NIL || second_test)
5745 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5749 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5750 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5756 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5757 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5759 scratch = gen_reg_rtx (HImode);
5760 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5761 emit_insn (gen_x86_sahf_1 (scratch));
5764 /* The FP codes work out to act like unsigned. */
5765 intcmp_mode = fpcmp_mode;
5767 if (bypass_code != NIL)
5768 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5769 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5771 if (second_code != NIL)
5772 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5773 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5778 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5779 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5780 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5782 scratch = gen_reg_rtx (HImode);
5783 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5785 /* In the unordered case, we have to check C2 for NaN's, which
5786 doesn't happen to work out to anything nice combination-wise.
5787 So do some bit twiddling on the value we've got in AH to come
5788 up with an appropriate set of condition codes. */
5790 intcmp_mode = CCNOmode;
5795 if (code == GT || !TARGET_IEEE_FP)
5797 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5802 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5803 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5804 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5805 intcmp_mode = CCmode;
5811 if (code == LT && TARGET_IEEE_FP)
5813 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5814 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5815 intcmp_mode = CCmode;
5820 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5826 if (code == GE || !TARGET_IEEE_FP)
5828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5833 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5834 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5841 if (code == LE && TARGET_IEEE_FP)
5843 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5844 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5845 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5846 intcmp_mode = CCmode;
5851 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5857 if (code == EQ && TARGET_IEEE_FP)
5859 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5860 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5861 intcmp_mode = CCmode;
5866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5873 if (code == NE && TARGET_IEEE_FP)
5875 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5876 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5888 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5892 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5901 /* Return the test that should be put into the flags user, i.e.
5902 the bcc, scc, or cmov instruction. */
5903 return gen_rtx_fmt_ee (code, VOIDmode,
5904 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5909 ix86_expand_compare (code, second_test, bypass_test)
5911 rtx *second_test, *bypass_test;
5914 op0 = ix86_compare_op0;
5915 op1 = ix86_compare_op1;
5918 *second_test = NULL_RTX;
5920 *bypass_test = NULL_RTX;
5922 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5923 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
5924 second_test, bypass_test);
5926 ret = ix86_expand_int_compare (code, op0, op1);
5932 ix86_expand_branch (code, label)
5938 switch (GET_MODE (ix86_compare_op0))
5944 tmp = ix86_expand_compare (code, NULL, NULL);
5945 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5946 gen_rtx_LABEL_REF (VOIDmode, label),
5948 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5955 /* Don't expand the comparison early, so that we get better code
5956 when jump or whoever decides to reverse the comparison. */
5961 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5964 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5965 ix86_compare_op0, ix86_compare_op1);
5966 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5967 gen_rtx_LABEL_REF (VOIDmode, label),
5969 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5971 use_fcomi = ix86_use_fcomi_compare (code);
5972 vec = rtvec_alloc (3 + !use_fcomi);
5973 RTVEC_ELT (vec, 0) = tmp;
5975 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5977 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5980 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5982 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5989 /* Expand DImode branch into multiple compare+branch. */
5991 rtx lo[2], hi[2], label2;
5992 enum rtx_code code1, code2, code3;
5994 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5996 tmp = ix86_compare_op0;
5997 ix86_compare_op0 = ix86_compare_op1;
5998 ix86_compare_op1 = tmp;
5999 code = swap_condition (code);
6001 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
6002 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
6004 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
6005 avoid two branches. This costs one extra insn, so disable when
6006 optimizing for size. */
6008 if ((code == EQ || code == NE)
6010 || hi[1] == const0_rtx || lo[1] == const0_rtx))
6015 if (hi[1] != const0_rtx)
6016 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
6017 NULL_RTX, 0, OPTAB_WIDEN);
6020 if (lo[1] != const0_rtx)
6021 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
6022 NULL_RTX, 0, OPTAB_WIDEN);
6024 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
6025 NULL_RTX, 0, OPTAB_WIDEN);
6027 ix86_compare_op0 = tmp;
6028 ix86_compare_op1 = const0_rtx;
6029 ix86_expand_branch (code, label);
6033 /* Otherwise, if we are doing less-than or greater-or-equal-than,
6034 op1 is a constant and the low word is zero, then we can just
6035 examine the high word. */
6037 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
6040 case LT: case LTU: case GE: case GEU:
6041 ix86_compare_op0 = hi[0];
6042 ix86_compare_op1 = hi[1];
6043 ix86_expand_branch (code, label);
6049 /* Otherwise, we need two or three jumps. */
6051 label2 = gen_label_rtx ();
6054 code2 = swap_condition (code);
6055 code3 = unsigned_condition (code);
6059 case LT: case GT: case LTU: case GTU:
6062 case LE: code1 = LT; code2 = GT; break;
6063 case GE: code1 = GT; code2 = LT; break;
6064 case LEU: code1 = LTU; code2 = GTU; break;
6065 case GEU: code1 = GTU; code2 = LTU; break;
6067 case EQ: code1 = NIL; code2 = NE; break;
6068 case NE: code2 = NIL; break;
6076 * if (hi(a) < hi(b)) goto true;
6077 * if (hi(a) > hi(b)) goto false;
6078 * if (lo(a) < lo(b)) goto true;
6082 ix86_compare_op0 = hi[0];
6083 ix86_compare_op1 = hi[1];
6086 ix86_expand_branch (code1, label);
6088 ix86_expand_branch (code2, label2);
6090 ix86_compare_op0 = lo[0];
6091 ix86_compare_op1 = lo[1];
6092 ix86_expand_branch (code3, label);
6095 emit_label (label2);
6104 /* Split branch based on floating point condition. */
6106 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
6107 rtx condition, op1, op2, target1, target2, tmp;
6110 rtx label = NULL_RTX;
6111 enum rtx_code code = GET_CODE (condition);
6113 if (target2 != pc_rtx)
6116 code = reverse_condition_maybe_unordered (code);
6121 condition = ix86_expand_fp_compare (code, op1, op2,
6122 tmp, &second, &bypass);
6123 if (bypass != NULL_RTX)
6125 label = gen_label_rtx ();
6126 emit_jump_insn (gen_rtx_SET
6128 gen_rtx_IF_THEN_ELSE (VOIDmode,
6130 gen_rtx_LABEL_REF (VOIDmode,
6134 /* AMD Athlon and probably other CPUs too have fast bypass path between the
6135 comparison and first branch. The second branch takes longer to execute
6136 so place first branch the worse predicable one if possible. */
6137 if (second != NULL_RTX
6138 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
6140 rtx tmp = condition;
6144 emit_jump_insn (gen_rtx_SET
6146 gen_rtx_IF_THEN_ELSE (VOIDmode,
6147 condition, target1, target2)));
6148 if (second != NULL_RTX)
6149 emit_jump_insn (gen_rtx_SET
6151 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
6152 if (label != NULL_RTX)
6157 ix86_expand_setcc (code, dest)
6161 rtx ret, tmp, tmpreg;
6162 rtx second_test, bypass_test;
6165 if (GET_MODE (ix86_compare_op0) == DImode
6167 return 0; /* FAIL */
6169 /* Three modes of generation:
6170 0 -- destination does not overlap compare sources:
6171 clear dest first, emit strict_low_part setcc.
6172 1 -- destination does overlap compare sources:
6173 emit subreg setcc, zero extend.
6174 2 -- destination is in QImode:
6180 if (GET_MODE (dest) == QImode)
6182 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
6183 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
6187 emit_move_insn (dest, const0_rtx);
6189 ret = ix86_expand_compare (code, &second_test, &bypass_test);
6190 PUT_MODE (ret, QImode);
6196 tmp = gen_lowpart (QImode, dest);
6198 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
6202 if (!cse_not_expected)
6203 tmp = gen_reg_rtx (QImode);
6205 tmp = gen_lowpart (QImode, dest);
6209 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
6210 if (bypass_test || second_test)
6212 rtx test = second_test;
6214 rtx tmp2 = gen_reg_rtx (QImode);
6221 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
6223 PUT_MODE (test, QImode);
6224 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
6227 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
6229 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
6236 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
6237 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
6238 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6239 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6243 return 1; /* DONE */
6247 ix86_expand_int_movcc (operands)
6250 enum rtx_code code = GET_CODE (operands[1]), compare_code;
6251 rtx compare_seq, compare_op;
6252 rtx second_test, bypass_test;
6254 /* When the compare code is not LTU or GEU, we can not use sbbl case.
6255 In case comparsion is done with immediate, we can convert it to LTU or
6256 GEU by altering the integer. */
6258 if ((code == LEU || code == GTU)
6259 && GET_CODE (ix86_compare_op1) == CONST_INT
6260 && GET_MODE (operands[0]) != HImode
6261 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
6262 && GET_CODE (operands[2]) == CONST_INT
6263 && GET_CODE (operands[3]) == CONST_INT)
6269 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
6273 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6274 compare_seq = gen_sequence ();
6277 compare_code = GET_CODE (compare_op);
6279 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
6280 HImode insns, we'd be swallowed in word prefix ops. */
6282 if (GET_MODE (operands[0]) != HImode
6283 && GET_MODE (operands[0]) != DImode
6284 && GET_CODE (operands[2]) == CONST_INT
6285 && GET_CODE (operands[3]) == CONST_INT)
6287 rtx out = operands[0];
6288 HOST_WIDE_INT ct = INTVAL (operands[2]);
6289 HOST_WIDE_INT cf = INTVAL (operands[3]);
6292 if ((compare_code == LTU || compare_code == GEU)
6293 && !second_test && !bypass_test)
6296 /* Detect overlap between destination and compare sources. */
6299 /* To simplify rest of code, restrict to the GEU case. */
6300 if (compare_code == LTU)
6305 compare_code = reverse_condition (compare_code);
6306 code = reverse_condition (code);
6310 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
6311 || reg_overlap_mentioned_p (out, ix86_compare_op1))
6312 tmp = gen_reg_rtx (SImode);
6314 emit_insn (compare_seq);
6315 emit_insn (gen_x86_movsicc_0_m1 (tmp));
6327 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6338 emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
6340 else if (diff == -1 && ct)
6350 emit_insn (gen_one_cmplsi2 (tmp, tmp));
6352 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
6359 * andl cf - ct, dest
6364 emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
6365 (cf - ct, SImode))));
6367 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
6371 emit_move_insn (out, tmp);
6373 return 1; /* DONE */
6380 tmp = ct, ct = cf, cf = tmp;
6382 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6384 /* We may be reversing unordered compare to normal compare, that
6385 is not valid in general (we may convert non-trapping condition
6386 to trapping one), however on i386 we currently emit all
6387 comparisons unordered. */
6388 compare_code = reverse_condition_maybe_unordered (compare_code);
6389 code = reverse_condition_maybe_unordered (code);
6393 compare_code = reverse_condition (compare_code);
6394 code = reverse_condition (code);
6397 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
6398 || diff == 3 || diff == 5 || diff == 9)
6404 * lea cf(dest*(ct-cf)),dest
6408 * This also catches the degenerate setcc-only case.
6414 out = emit_store_flag (out, code, ix86_compare_op0,
6415 ix86_compare_op1, VOIDmode, 0, 1);
6418 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
6419 done in proper mode to match. */
6422 if (Pmode != SImode)
6423 tmp = gen_lowpart (Pmode, out);
6430 if (Pmode != SImode)
6431 out1 = gen_lowpart (Pmode, out);
6434 tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
6438 tmp = gen_rtx_PLUS (Pmode, tmp, out1);
6444 tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
6448 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
6450 if (Pmode != SImode)
6451 tmp = gen_rtx_SUBREG (SImode, tmp, 0);
6453 /* ??? We should to take care for outputing non-lea arithmetics
6454 for Pmode != SImode case too, but it is quite tricky and not
6455 too important, since all TARGET_64BIT machines support real
6456 conditional moves. */
6457 if (nops == 1 && Pmode == SImode)
6461 clob = gen_rtx_REG (CCmode, FLAGS_REG);
6462 clob = gen_rtx_CLOBBER (VOIDmode, clob);
6464 tmp = gen_rtx_SET (VOIDmode, out, tmp);
6465 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6469 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
6471 if (out != operands[0])
6472 emit_move_insn (operands[0], out);
6474 return 1; /* DONE */
6478 * General case: Jumpful:
6479 * xorl dest,dest cmpl op1, op2
6480 * cmpl op1, op2 movl ct, dest
6482 * decl dest movl cf, dest
6483 * andl (cf-ct),dest 1:
6488 * This is reasonably steep, but branch mispredict costs are
6489 * high on modern cpus, so consider failing only if optimizing
6492 * %%% Parameterize branch_cost on the tuning architecture, then
6493 * use that. The 80386 couldn't care less about mispredicts.
6496 if (!optimize_size && !TARGET_CMOVE)
6502 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
6504 /* We may be reversing unordered compare to normal compare,
6505 that is not valid in general (we may convert non-trapping
6506 condition to trapping one), however on i386 we currently
6507 emit all comparisons unordered. */
6508 compare_code = reverse_condition_maybe_unordered (compare_code);
6509 code = reverse_condition_maybe_unordered (code);
6513 compare_code = reverse_condition (compare_code);
6514 code = reverse_condition (code);
6518 out = emit_store_flag (out, code, ix86_compare_op0,
6519 ix86_compare_op1, VOIDmode, 0, 1);
6521 emit_insn (gen_addsi3 (out, out, constm1_rtx));
6522 emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
6523 (cf - ct, SImode))));
6525 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
6526 if (out != operands[0])
6527 emit_move_insn (operands[0], out);
6529 return 1; /* DONE */
6535 /* Try a few things more with specific constants and a variable. */
6538 rtx var, orig_out, out, tmp;
6541 return 0; /* FAIL */
6543 /* If one of the two operands is an interesting constant, load a
6544 constant with the above and mask it in with a logical operation. */
6546 if (GET_CODE (operands[2]) == CONST_INT)
6549 if (INTVAL (operands[2]) == 0)
6550 operands[3] = constm1_rtx, op = and_optab;
6551 else if (INTVAL (operands[2]) == -1)
6552 operands[3] = const0_rtx, op = ior_optab;
6554 return 0; /* FAIL */
6556 else if (GET_CODE (operands[3]) == CONST_INT)
6559 if (INTVAL (operands[3]) == 0)
6560 operands[2] = constm1_rtx, op = and_optab;
6561 else if (INTVAL (operands[3]) == -1)
6562 operands[2] = const0_rtx, op = ior_optab;
6564 return 0; /* FAIL */
6567 return 0; /* FAIL */
6569 orig_out = operands[0];
6570 tmp = gen_reg_rtx (GET_MODE (orig_out));
6573 /* Recurse to get the constant loaded. */
6574 if (ix86_expand_int_movcc (operands) == 0)
6575 return 0; /* FAIL */
6577 /* Mask in the interesting variable. */
6578 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
6580 if (out != orig_out)
6581 emit_move_insn (orig_out, out);
6583 return 1; /* DONE */
6587 * For comparison with above,
6597 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
6598 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6599 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
6600 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
6602 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6604 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6605 emit_move_insn (tmp, operands[3]);
6608 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6610 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
6611 emit_move_insn (tmp, operands[2]);
6614 if (! register_operand (operands[2], VOIDmode)
6615 && ! register_operand (operands[3], VOIDmode))
6616 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
6618 emit_insn (compare_seq);
6619 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6620 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6621 compare_op, operands[2],
6624 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6625 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6630 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6631 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6636 return 1; /* DONE */
6640 ix86_expand_fp_movcc (operands)
6645 rtx compare_op, second_test, bypass_test;
6647 /* For SF/DFmode conditional moves based on comparisons
6648 in same mode, we may want to use SSE min/max instructions. */
6649 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
6650 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
6651 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
6652 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
6654 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
6655 /* We may be called from the post-reload splitter. */
6656 && (!REG_P (operands[0])
6657 || SSE_REG_P (operands[0])
6658 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
6660 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
6661 code = GET_CODE (operands[1]);
6663 /* See if we have (cross) match between comparison operands and
6664 conditional move operands. */
6665 if (rtx_equal_p (operands[2], op1))
6670 code = reverse_condition_maybe_unordered (code);
6672 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
6674 /* Check for min operation. */
6677 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6678 if (memory_operand (op0, VOIDmode))
6679 op0 = force_reg (GET_MODE (operands[0]), op0);
6680 if (GET_MODE (operands[0]) == SFmode)
6681 emit_insn (gen_minsf3 (operands[0], op0, op1));
6683 emit_insn (gen_mindf3 (operands[0], op0, op1));
6686 /* Check for max operation. */
6689 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
6690 if (memory_operand (op0, VOIDmode))
6691 op0 = force_reg (GET_MODE (operands[0]), op0);
6692 if (GET_MODE (operands[0]) == SFmode)
6693 emit_insn (gen_maxsf3 (operands[0], op0, op1));
6695 emit_insn (gen_maxdf3 (operands[0], op0, op1));
6699 /* Manage condition to be sse_comparison_operator. In case we are
6700 in non-ieee mode, try to canonicalize the destination operand
6701 to be first in the comparison - this helps reload to avoid extra
6703 if (!sse_comparison_operator (operands[1], VOIDmode)
6704 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
6706 rtx tmp = ix86_compare_op0;
6707 ix86_compare_op0 = ix86_compare_op1;
6708 ix86_compare_op1 = tmp;
6709 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
6710 VOIDmode, ix86_compare_op0,
6713 /* Similary try to manage result to be first operand of conditional
6714 move. We also don't support the NE comparison on SSE, so try to
6716 if ((rtx_equal_p (operands[0], operands[3])
6717 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
6718 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
6720 rtx tmp = operands[2];
6721 operands[2] = operands[3];
6723 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
6724 (GET_CODE (operands[1])),
6725 VOIDmode, ix86_compare_op0,
6728 if (GET_MODE (operands[0]) == SFmode)
6729 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
6730 operands[2], operands[3],
6731 ix86_compare_op0, ix86_compare_op1));
6733 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
6734 operands[2], operands[3],
6735 ix86_compare_op0, ix86_compare_op1));
6739 /* The floating point conditional move instructions don't directly
6740 support conditions resulting from a signed integer comparison. */
6742 code = GET_CODE (operands[1]);
6743 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6745 /* The floating point conditional move instructions don't directly
6746 support signed integer comparisons. */
6748 if (!fcmov_comparison_operator (compare_op, VOIDmode))
6750 if (second_test != NULL || bypass_test != NULL)
6752 tmp = gen_reg_rtx (QImode);
6753 ix86_expand_setcc (code, tmp);
6755 ix86_compare_op0 = tmp;
6756 ix86_compare_op1 = const0_rtx;
6757 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
6759 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
6761 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6762 emit_move_insn (tmp, operands[3]);
6765 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
6767 tmp = gen_reg_rtx (GET_MODE (operands[0]));
6768 emit_move_insn (tmp, operands[2]);
6772 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6773 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6778 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6779 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6784 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
6785 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
6793 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
6794 works for floating pointer parameters and nonoffsetable memories.
6795 For pushes, it returns just stack offsets; the values will be saved
6796 in the right order. Maximally three parts are generated. */
6799 ix86_split_to_parts (operand, parts, mode)
6802 enum machine_mode mode;
6807 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
6809 size = (GET_MODE_SIZE (mode) + 4) / 8;
6811 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
6813 if (size < 2 || size > 3)
6816 /* Optimize constant pool reference to immediates. This is used by fp moves,
6817 that force all constants to memory to allow combining. */
6819 if (GET_CODE (operand) == MEM
6820 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
6821 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
6822 operand = get_pool_constant (XEXP (operand, 0));
6824 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
6826 /* The only non-offsetable memories we handle are pushes. */
6827 if (! push_operand (operand, VOIDmode))
6830 operand = copy_rtx (operand);
6831 PUT_MODE (operand, Pmode);
6832 parts[0] = parts[1] = parts[2] = operand;
6834 else if (!TARGET_64BIT)
6837 split_di (&operand, 1, &parts[0], &parts[1]);
6840 if (REG_P (operand))
6842 if (!reload_completed)
6844 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
6845 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6847 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
6849 else if (offsettable_memref_p (operand))
6851 operand = change_address (operand, SImode, XEXP (operand, 0));
6853 parts[1] = adj_offsettable_operand (operand, 4);
6855 parts[2] = adj_offsettable_operand (operand, 8);
6857 else if (GET_CODE (operand) == CONST_DOUBLE)
6862 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6867 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6868 parts[2] = GEN_INT (l[2]);
6871 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6876 parts[1] = GEN_INT (l[1]);
6877 parts[0] = GEN_INT (l[0]);
6885 if (mode == XFmode || mode == TFmode)
6887 if (REG_P (operand))
6889 if (!reload_completed)
6891 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
6892 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
6894 else if (offsettable_memref_p (operand))
6896 operand = change_address (operand, DImode, XEXP (operand, 0));
6898 parts[1] = adj_offsettable_operand (operand, 8);
6899 parts[1] = change_address (parts[1], SImode, XEXP (parts[1], 0));
6901 else if (GET_CODE (operand) == CONST_DOUBLE)
6906 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
6907 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
6908 /* Do not use shift by 32 to avoid warning on 32bit systems. */
6909 if (HOST_BITS_PER_WIDE_INT >= 64)
6910 parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
6912 parts[0] = immed_double_const (l[0], l[1], DImode);
6913 parts[1] = GEN_INT (l[2]);
6923 /* Emit insns to perform a move or push of DI, DF, and XF values.
6924 Return false when normal moves are needed; true when all required
6925 insns have been emitted. Operands 2-4 contain the input values
6926 int the correct order; operands 5-7 contain the output values. */
6929 ix86_split_long_move (operands)
6936 enum machine_mode mode = GET_MODE (operands[0]);
6938 /* The DFmode expanders may ask us to move double.
6939 For 64bit target this is single move. By hiding the fact
6940 here we simplify i386.md splitters. */
6941 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
6943 /* Optimize constant pool reference to immediates. This is used by fp moves,
6944 that force all constants to memory to allow combining. */
6946 if (GET_CODE (operands[1]) == MEM
6947 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
6948 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
6949 operands[1] = get_pool_constant (XEXP (operands[1], 0));
6950 if (push_operand (operands[0], VOIDmode))
6952 operands[0] = copy_rtx (operands[0]);
6953 PUT_MODE (operands[0], Pmode);
6956 operands[0] = gen_lowpart (DImode, operands[0]);
6957 operands[1] = gen_lowpart (DImode, operands[1]);
6958 emit_move_insn (operands[0], operands[1]);
6962 /* The only non-offsettable memory we handle is push. */
6963 if (push_operand (operands[0], VOIDmode))
6965 else if (GET_CODE (operands[0]) == MEM
6966 && ! offsettable_memref_p (operands[0]))
6969 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
6970 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
6972 /* When emitting push, take care for source operands on the stack. */
6973 if (push && GET_CODE (operands[1]) == MEM
6974 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6977 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
6978 XEXP (part[1][2], 0));
6979 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
6980 XEXP (part[1][1], 0));
6983 /* We need to do copy in the right order in case an address register
6984 of the source overlaps the destination. */
6985 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6987 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6989 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6992 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6995 /* Collision in the middle part can be handled by reordering. */
6996 if (collisions == 1 && nparts == 3
6997 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
7000 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
7001 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
7004 /* If there are more collisions, we can't handle it by reordering.
7005 Do an lea to the last part and use only one colliding move. */
7006 else if (collisions > 1)
7009 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
7010 XEXP (part[1][0], 0)));
7011 part[1][0] = change_address (part[1][0],
7012 TARGET_64BIT ? DImode : SImode,
7013 part[0][nparts - 1]);
7014 part[1][1] = adj_offsettable_operand (part[1][0],
7016 part[1][1] = change_address (part[1][1], GET_MODE (part[0][1]),
7017 XEXP (part[1][1], 0));
7019 part[1][2] = adj_offsettable_operand (part[1][0], 8);
7029 /* We use only first 12 bytes of TFmode value, but for pushing we
7030 are required to adjust stack as if we were pushing real 16byte
7032 if (mode == TFmode && !TARGET_64BIT)
7033 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
7035 emit_move_insn (part[0][2], part[1][2]);
7040 /* In 64bit mode we don't have 32bit push available. In case this is
7041 register, it is OK - we will just use larger counterpart. We also
7042 retype memory - these comes from attempt to avoid REX prefix on
7043 moving of second half of TFmode value. */
7044 if (GET_MODE (part[1][1]) == SImode)
7046 if (GET_CODE (part[1][1]) == MEM)
7047 part[1][1] = change_address (part[1][1], DImode, XEXP (part[1][1], 0));
7048 else if (REG_P (part[1][1]))
7049 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
7052 if (GET_MODE (part[1][0]) == SImode)
7053 part[1][0] = part[1][1];
7056 emit_move_insn (part[0][1], part[1][1]);
7057 emit_move_insn (part[0][0], part[1][0]);
7061 /* Choose correct order to not overwrite the source before it is copied. */
7062 if ((REG_P (part[0][0])
7063 && REG_P (part[1][1])
7064 && (REGNO (part[0][0]) == REGNO (part[1][1])
7066 && REGNO (part[0][0]) == REGNO (part[1][2]))))
7068 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
7072 operands[2] = part[0][2];
7073 operands[3] = part[0][1];
7074 operands[4] = part[0][0];
7075 operands[5] = part[1][2];
7076 operands[6] = part[1][1];
7077 operands[7] = part[1][0];
7081 operands[2] = part[0][1];
7082 operands[3] = part[0][0];
7083 operands[5] = part[1][1];
7084 operands[6] = part[1][0];
7091 operands[2] = part[0][0];
7092 operands[3] = part[0][1];
7093 operands[4] = part[0][2];
7094 operands[5] = part[1][0];
7095 operands[6] = part[1][1];
7096 operands[7] = part[1][2];
7100 operands[2] = part[0][0];
7101 operands[3] = part[0][1];
7102 operands[5] = part[1][0];
7103 operands[6] = part[1][1];
7106 emit_move_insn (operands[2], operands[5]);
7107 emit_move_insn (operands[3], operands[6]);
7109 emit_move_insn (operands[4], operands[7]);
7115 ix86_split_ashldi (operands, scratch)
7116 rtx *operands, scratch;
7118 rtx low[2], high[2];
7121 if (GET_CODE (operands[2]) == CONST_INT)
7123 split_di (operands, 2, low, high);
7124 count = INTVAL (operands[2]) & 63;
7128 emit_move_insn (high[0], low[1]);
7129 emit_move_insn (low[0], const0_rtx);
7132 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
7136 if (!rtx_equal_p (operands[0], operands[1]))
7137 emit_move_insn (operands[0], operands[1]);
7138 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
7139 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
7144 if (!rtx_equal_p (operands[0], operands[1]))
7145 emit_move_insn (operands[0], operands[1]);
7147 split_di (operands, 1, low, high);
7149 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
7150 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
7152 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7154 if (! no_new_pseudos)
7155 scratch = force_reg (SImode, const0_rtx);
7157 emit_move_insn (scratch, const0_rtx);
7159 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
7163 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
7168 ix86_split_ashrdi (operands, scratch)
7169 rtx *operands, scratch;
7171 rtx low[2], high[2];
7174 if (GET_CODE (operands[2]) == CONST_INT)
7176 split_di (operands, 2, low, high);
7177 count = INTVAL (operands[2]) & 63;
7181 emit_move_insn (low[0], high[1]);
7183 if (! reload_completed)
7184 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
7187 emit_move_insn (high[0], low[0]);
7188 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
7192 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
7196 if (!rtx_equal_p (operands[0], operands[1]))
7197 emit_move_insn (operands[0], operands[1]);
7198 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7199 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
7204 if (!rtx_equal_p (operands[0], operands[1]))
7205 emit_move_insn (operands[0], operands[1]);
7207 split_di (operands, 1, low, high);
7209 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7210 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
7212 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7214 if (! no_new_pseudos)
7215 scratch = gen_reg_rtx (SImode);
7216 emit_move_insn (scratch, high[0]);
7217 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
7218 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7222 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
7227 ix86_split_lshrdi (operands, scratch)
7228 rtx *operands, scratch;
7230 rtx low[2], high[2];
7233 if (GET_CODE (operands[2]) == CONST_INT)
7235 split_di (operands, 2, low, high);
7236 count = INTVAL (operands[2]) & 63;
7240 emit_move_insn (low[0], high[1]);
7241 emit_move_insn (high[0], const0_rtx);
7244 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
7248 if (!rtx_equal_p (operands[0], operands[1]))
7249 emit_move_insn (operands[0], operands[1]);
7250 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
7251 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
7256 if (!rtx_equal_p (operands[0], operands[1]))
7257 emit_move_insn (operands[0], operands[1]);
7259 split_di (operands, 1, low, high);
7261 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
7262 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
7264 /* Heh. By reversing the arguments, we can reuse this pattern. */
7265 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
7267 if (! no_new_pseudos)
7268 scratch = force_reg (SImode, const0_rtx);
7270 emit_move_insn (scratch, const0_rtx);
7272 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
7276 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
7280 /* Helper function for the string operations bellow. Dest VARIABLE whether
7281 it is aligned to VALUE bytes. If true, jump to the label. */
7283 ix86_expand_aligntest (variable, value)
7287 rtx label = gen_label_rtx ();
7288 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
7289 if (GET_MODE (variable) == DImode)
7290 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
7292 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
7293 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
7298 /* Adjust COUNTER by the VALUE. */
7300 ix86_adjust_counter (countreg, value)
7302 HOST_WIDE_INT value;
7304 if (GET_MODE (countreg) == DImode)
7305 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
7307 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
7310 /* Zero extend possibly SImode EXP to Pmode register. */
7312 ix86_zero_extend_to_Pmode (exp)
7316 if (GET_MODE (exp) == VOIDmode)
7317 return force_reg (Pmode, exp);
7318 if (GET_MODE (exp) == Pmode)
7319 return copy_to_mode_reg (Pmode, exp);
7320 r = gen_reg_rtx (Pmode);
7321 emit_insn (gen_zero_extendsidi2 (r, exp));
7325 /* Expand string move (memcpy) operation. Use i386 string operations when
7326 profitable. expand_clrstr contains similar code. */
7328 ix86_expand_movstr (dst, src, count_exp, align_exp)
7329 rtx dst, src, count_exp, align_exp;
7331 rtx srcreg, destreg, countreg;
7332 enum machine_mode counter_mode;
7333 HOST_WIDE_INT align = 0;
7334 unsigned HOST_WIDE_INT count = 0;
7339 if (GET_CODE (align_exp) == CONST_INT)
7340 align = INTVAL (align_exp);
7342 /* This simple hack avoids all inlining code and simplifies code bellow. */
7343 if (!TARGET_ALIGN_STRINGOPS)
7346 if (GET_CODE (count_exp) == CONST_INT)
7347 count = INTVAL (count_exp);
7349 /* Figure out proper mode for counter. For 32bits it is always SImode,
7350 for 64bits use SImode when possible, otherwise DImode.
7351 Set count to number of bytes copied when known at compile time. */
7352 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7353 || x86_64_zero_extended_value (count_exp))
7354 counter_mode = SImode;
7356 counter_mode = DImode;
7358 if (counter_mode != SImode && counter_mode != DImode)
7361 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
7362 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7364 emit_insn (gen_cld ());
7366 /* When optimizing for size emit simple rep ; movsb instruction for
7367 counts not divisible by 4. */
7369 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7371 countreg = ix86_zero_extend_to_Pmode (count_exp);
7373 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
7374 destreg, srcreg, countreg));
7376 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
7377 destreg, srcreg, countreg));
7380 /* For constant aligned (or small unaligned) copies use rep movsl
7381 followed by code copying the rest. For PentiumPro ensure 8 byte
7382 alignment to allow rep movsl acceleration. */
7386 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7387 || optimize_size || count < (unsigned int)64))
7389 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7390 if (count & ~(size - 1))
7392 countreg = copy_to_mode_reg (counter_mode,
7393 GEN_INT ((count >> (size == 4 ? 2 : 3))
7394 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7395 countreg = ix86_zero_extend_to_Pmode (countreg);
7399 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
7400 destreg, srcreg, countreg));
7402 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
7403 destreg, srcreg, countreg));
7406 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
7407 destreg, srcreg, countreg));
7409 if (size == 8 && (count & 0x04))
7410 emit_insn (gen_strmovsi (destreg, srcreg));
7412 emit_insn (gen_strmovhi (destreg, srcreg));
7414 emit_insn (gen_strmovqi (destreg, srcreg));
7416 /* The generic code based on the glibc implementation:
7417 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
7418 allowing accelerated copying there)
7419 - copy the data using rep movsl
7426 /* In case we don't know anything about the alignment, default to
7427 library version, since it is usually equally fast and result in
7429 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7435 if (TARGET_SINGLE_STRINGOP)
7436 emit_insn (gen_cld ());
7438 countreg2 = gen_reg_rtx (Pmode);
7439 countreg = copy_to_mode_reg (counter_mode, count_exp);
7441 /* We don't use loops to align destination and to copy parts smaller
7442 than 4 bytes, because gcc is able to optimize such code better (in
7443 the case the destination or the count really is aligned, gcc is often
7444 able to predict the branches) and also it is friendlier to the
7445 hardware branch prediction.
7447 Using loops is benefical for generic case, because we can
7448 handle small counts using the loops. Many CPUs (such as Athlon)
7449 have large REP prefix setup costs.
7451 This is quite costy. Maybe we can revisit this decision later or
7452 add some customizability to this code. */
7455 && align < (TARGET_PENTIUMPRO && (count == 0
7456 || count >= (unsigned int)260)
7457 ? 8 : UNITS_PER_WORD))
7459 label = gen_label_rtx ();
7460 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7461 LEU, 0, counter_mode, 1, 0, label);
7465 rtx label = ix86_expand_aligntest (destreg, 1);
7466 emit_insn (gen_strmovqi (destreg, srcreg));
7467 ix86_adjust_counter (countreg, 1);
7469 LABEL_NUSES (label) = 1;
7473 rtx label = ix86_expand_aligntest (destreg, 2);
7474 emit_insn (gen_strmovhi (destreg, srcreg));
7475 ix86_adjust_counter (countreg, 2);
7477 LABEL_NUSES (label) = 1;
7480 && ((TARGET_PENTIUMPRO && (count == 0
7481 || count >= (unsigned int)260))
7484 rtx label = ix86_expand_aligntest (destreg, 4);
7485 emit_insn (gen_strmovsi (destreg, srcreg));
7486 ix86_adjust_counter (countreg, 4);
7488 LABEL_NUSES (label) = 1;
7491 if (!TARGET_SINGLE_STRINGOP)
7492 emit_insn (gen_cld ());
7495 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7497 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
7498 destreg, srcreg, countreg2));
7502 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7503 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
7504 destreg, srcreg, countreg2));
7510 LABEL_NUSES (label) = 1;
7512 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7513 emit_insn (gen_strmovsi (destreg, srcreg));
7514 if ((align <= 4 || count == 0) && TARGET_64BIT)
7516 rtx label = ix86_expand_aligntest (countreg, 4);
7517 emit_insn (gen_strmovsi (destreg, srcreg));
7519 LABEL_NUSES (label) = 1;
7521 if (align > 2 && count != 0 && (count & 2))
7522 emit_insn (gen_strmovhi (destreg, srcreg));
7523 if (align <= 2 || count == 0)
7525 rtx label = ix86_expand_aligntest (countreg, 2);
7526 emit_insn (gen_strmovhi (destreg, srcreg));
7528 LABEL_NUSES (label) = 1;
7530 if (align > 1 && count != 0 && (count & 1))
7531 emit_insn (gen_strmovqi (destreg, srcreg));
7532 if (align <= 1 || count == 0)
7534 rtx label = ix86_expand_aligntest (countreg, 1);
7535 emit_insn (gen_strmovqi (destreg, srcreg));
7537 LABEL_NUSES (label) = 1;
7541 insns = get_insns ();
7544 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
7549 /* Expand string clear operation (bzero). Use i386 string operations when
7550 profitable. expand_movstr contains similar code. */
7552 ix86_expand_clrstr (src, count_exp, align_exp)
7553 rtx src, count_exp, align_exp;
7555 rtx destreg, zeroreg, countreg;
7556 enum machine_mode counter_mode;
7557 HOST_WIDE_INT align = 0;
7558 unsigned HOST_WIDE_INT count = 0;
7560 if (GET_CODE (align_exp) == CONST_INT)
7561 align = INTVAL (align_exp);
7563 /* This simple hack avoids all inlining code and simplifies code bellow. */
7564 if (!TARGET_ALIGN_STRINGOPS)
7567 if (GET_CODE (count_exp) == CONST_INT)
7568 count = INTVAL (count_exp);
7569 /* Figure out proper mode for counter. For 32bits it is always SImode,
7570 for 64bits use SImode when possible, otherwise DImode.
7571 Set count to number of bytes copied when known at compile time. */
7572 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
7573 || x86_64_zero_extended_value (count_exp))
7574 counter_mode = SImode;
7576 counter_mode = DImode;
7578 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
7580 emit_insn (gen_cld ());
7582 /* When optimizing for size emit simple rep ; movsb instruction for
7583 counts not divisible by 4. */
7585 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
7587 countreg = ix86_zero_extend_to_Pmode (count_exp);
7588 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
7590 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
7591 destreg, countreg));
7593 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
7594 destreg, countreg));
7598 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
7599 || optimize_size || count < (unsigned int)64))
7601 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
7602 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
7603 if (count & ~(size - 1))
7605 countreg = copy_to_mode_reg (counter_mode,
7606 GEN_INT ((count >> (size == 4 ? 2 : 3))
7607 & (TARGET_64BIT ? -1 : 0x3fffffff)));
7608 countreg = ix86_zero_extend_to_Pmode (countreg);
7612 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
7613 destreg, countreg));
7615 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
7616 destreg, countreg));
7619 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
7620 destreg, countreg));
7622 if (size == 8 && (count & 0x04))
7623 emit_insn (gen_strsetsi (destreg,
7624 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7626 emit_insn (gen_strsethi (destreg,
7627 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7629 emit_insn (gen_strsetqi (destreg,
7630 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7637 /* In case we don't know anything about the alignment, default to
7638 library version, since it is usually equally fast and result in
7640 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
7643 if (TARGET_SINGLE_STRINGOP)
7644 emit_insn (gen_cld ());
7646 countreg2 = gen_reg_rtx (Pmode);
7647 countreg = copy_to_mode_reg (counter_mode, count_exp);
7648 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
7651 && align < (TARGET_PENTIUMPRO && (count == 0
7652 || count >= (unsigned int)260)
7653 ? 8 : UNITS_PER_WORD))
7655 label = gen_label_rtx ();
7656 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
7657 LEU, 0, counter_mode, 1, 0, label);
7661 rtx label = ix86_expand_aligntest (destreg, 1);
7662 emit_insn (gen_strsetqi (destreg,
7663 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7664 ix86_adjust_counter (countreg, 1);
7666 LABEL_NUSES (label) = 1;
7670 rtx label = ix86_expand_aligntest (destreg, 2);
7671 emit_insn (gen_strsethi (destreg,
7672 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7673 ix86_adjust_counter (countreg, 2);
7675 LABEL_NUSES (label) = 1;
7677 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
7678 || count >= (unsigned int)260))
7680 rtx label = ix86_expand_aligntest (destreg, 4);
7681 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
7682 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
7684 ix86_adjust_counter (countreg, 4);
7686 LABEL_NUSES (label) = 1;
7689 if (!TARGET_SINGLE_STRINGOP)
7690 emit_insn (gen_cld ());
7693 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
7695 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
7696 destreg, countreg2));
7700 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
7701 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
7702 destreg, countreg2));
7708 LABEL_NUSES (label) = 1;
7710 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
7711 emit_insn (gen_strsetsi (destreg,
7712 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7713 if (TARGET_64BIT && (align <= 4 || count == 0))
7715 rtx label = ix86_expand_aligntest (destreg, 2);
7716 emit_insn (gen_strsetsi (destreg,
7717 gen_rtx_SUBREG (SImode, zeroreg, 0)));
7719 LABEL_NUSES (label) = 1;
7721 if (align > 2 && count != 0 && (count & 2))
7722 emit_insn (gen_strsethi (destreg,
7723 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7724 if (align <= 2 || count == 0)
7726 rtx label = ix86_expand_aligntest (destreg, 2);
7727 emit_insn (gen_strsethi (destreg,
7728 gen_rtx_SUBREG (HImode, zeroreg, 0)));
7730 LABEL_NUSES (label) = 1;
7732 if (align > 1 && count != 0 && (count & 1))
7733 emit_insn (gen_strsetqi (destreg,
7734 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7735 if (align <= 1 || count == 0)
7737 rtx label = ix86_expand_aligntest (destreg, 1);
7738 emit_insn (gen_strsetqi (destreg,
7739 gen_rtx_SUBREG (QImode, zeroreg, 0)));
7741 LABEL_NUSES (label) = 1;
7746 /* Expand strlen. */
7748 ix86_expand_strlen (out, src, eoschar, align)
7749 rtx out, src, eoschar, align;
7751 rtx addr, scratch1, scratch2, scratch3, scratch4;
7753 /* The generic case of strlen expander is long. Avoid it's
7754 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
7756 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7757 && !TARGET_INLINE_ALL_STRINGOPS
7759 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
7762 addr = force_reg (Pmode, XEXP (src, 0));
7763 scratch1 = gen_reg_rtx (Pmode);
7765 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
7768 /* Well it seems that some optimizer does not combine a call like
7769 foo(strlen(bar), strlen(bar));
7770 when the move and the subtraction is done here. It does calculate
7771 the length just once when these instructions are done inside of
7772 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
7773 often used and I use one fewer register for the lifetime of
7774 output_strlen_unroll() this is better. */
7776 emit_move_insn (out, addr);
7778 ix86_expand_strlensi_unroll_1 (out, align);
7780 /* strlensi_unroll_1 returns the address of the zero at the end of
7781 the string, like memchr(), so compute the length by subtracting
7782 the start address. */
7784 emit_insn (gen_subdi3 (out, out, addr));
7786 emit_insn (gen_subsi3 (out, out, addr));
7790 scratch2 = gen_reg_rtx (Pmode);
7791 scratch3 = gen_reg_rtx (Pmode);
7792 scratch4 = force_reg (Pmode, constm1_rtx);
7794 emit_move_insn (scratch3, addr);
7795 eoschar = force_reg (QImode, eoschar);
7797 emit_insn (gen_cld ());
7800 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
7801 align, scratch4, scratch3));
7802 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
7803 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
7807 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
7808 align, scratch4, scratch3));
7809 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
7810 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
7816 /* Expand the appropriate insns for doing strlen if not just doing
7819 out = result, initialized with the start address
7820 align_rtx = alignment of the address.
7821 scratch = scratch register, initialized with the startaddress when
7822 not aligned, otherwise undefined
7824 This is just the body. It needs the initialisations mentioned above and
7825 some address computing at the end. These things are done in i386.md. */
7828 ix86_expand_strlensi_unroll_1 (out, align_rtx)
7833 rtx align_2_label = NULL_RTX;
7834 rtx align_3_label = NULL_RTX;
7835 rtx align_4_label = gen_label_rtx ();
7836 rtx end_0_label = gen_label_rtx ();
7838 rtx tmpreg = gen_reg_rtx (SImode);
7839 rtx scratch = gen_reg_rtx (SImode);
7842 if (GET_CODE (align_rtx) == CONST_INT)
7843 align = INTVAL (align_rtx);
7845 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
7847 /* Is there a known alignment and is it less than 4? */
7850 rtx scratch1 = gen_reg_rtx (Pmode);
7851 emit_move_insn (scratch1, out);
7852 /* Is there a known alignment and is it not 2? */
7855 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
7856 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
7858 /* Leave just the 3 lower bits. */
7859 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
7860 NULL_RTX, 0, OPTAB_WIDEN);
7862 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
7863 Pmode, 1, 0, align_4_label);
7864 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
7865 Pmode, 1, 0, align_2_label);
7866 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
7867 Pmode, 1, 0, align_3_label);
7871 /* Since the alignment is 2, we have to check 2 or 0 bytes;
7872 check if is aligned to 4 - byte. */
7874 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
7875 NULL_RTX, 0, OPTAB_WIDEN);
7877 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
7878 Pmode, 1, 0, align_4_label);
7881 mem = gen_rtx_MEM (QImode, out);
7883 /* Now compare the bytes. */
7885 /* Compare the first n unaligned byte on a byte per byte basis. */
7886 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7887 QImode, 1, 0, end_0_label);
7889 /* Increment the address. */
7891 emit_insn (gen_adddi3 (out, out, const1_rtx));
7893 emit_insn (gen_addsi3 (out, out, const1_rtx));
7895 /* Not needed with an alignment of 2 */
7898 emit_label (align_2_label);
7900 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7901 QImode, 1, 0, end_0_label);
7904 emit_insn (gen_adddi3 (out, out, const1_rtx));
7906 emit_insn (gen_addsi3 (out, out, const1_rtx));
7908 emit_label (align_3_label);
7911 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
7912 QImode, 1, 0, end_0_label);
7915 emit_insn (gen_adddi3 (out, out, const1_rtx));
7917 emit_insn (gen_addsi3 (out, out, const1_rtx));
7920 /* Generate loop to check 4 bytes at a time. It is not a good idea to
7921 align this loop. It gives only huge programs, but does not help to
7923 emit_label (align_4_label);
7925 mem = gen_rtx_MEM (SImode, out);
7926 emit_move_insn (scratch, mem);
7928 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
7930 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
7932 /* This formula yields a nonzero result iff one of the bytes is zero.
7933 This saves three branches inside loop and many cycles. */
7935 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
7936 emit_insn (gen_one_cmplsi2 (scratch, scratch));
7937 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7938 emit_insn (gen_andsi3 (tmpreg, tmpreg,
7939 GEN_INT (trunc_int_for_mode
7940 (0x80808080, SImode))));
7941 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
7942 SImode, 1, 0, align_4_label);
7946 rtx reg = gen_reg_rtx (SImode);
7947 rtx reg2 = gen_reg_rtx (Pmode);
7948 emit_move_insn (reg, tmpreg);
7949 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
7951 /* If zero is not in the first two bytes, move two bytes forward. */
7952 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7953 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7954 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7955 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
7956 gen_rtx_IF_THEN_ELSE (SImode, tmp,
7959 /* Emit lea manually to avoid clobbering of flags. */
7960 emit_insn (gen_rtx_SET (SImode, reg2,
7961 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
7963 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7964 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
7965 emit_insn (gen_rtx_SET (VOIDmode, out,
7966 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
7973 rtx end_2_label = gen_label_rtx ();
7974 /* Is zero in the first two bytes? */
7976 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
7977 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
7978 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
7979 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7980 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
7982 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7983 JUMP_LABEL (tmp) = end_2_label;
7985 /* Not in the first two. Move two bytes forward. */
7986 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
7988 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
7990 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
7992 emit_label (end_2_label);
7996 /* Avoid branch in fixing the byte. */
7997 tmpreg = gen_lowpart (QImode, tmpreg);
7998 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
8000 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
8002 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
8004 emit_label (end_0_label);
8007 /* Clear stack slot assignments remembered from previous functions.
8008 This is called from INIT_EXPANDERS once before RTL is emitted for each
8012 ix86_init_machine_status (p)
8015 p->machine = (struct machine_function *)
8016 xcalloc (1, sizeof (struct machine_function));
8019 /* Mark machine specific bits of P for GC. */
8021 ix86_mark_machine_status (p)
8024 struct machine_function *machine = p->machine;
8025 enum machine_mode mode;
8031 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
8032 mode = (enum machine_mode) ((int) mode + 1))
8033 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
8034 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
8038 ix86_free_machine_status (p)
8045 /* Return a MEM corresponding to a stack slot with mode MODE.
8046 Allocate a new slot if necessary.
8048 The RTL for a function can have several slots available: N is
8049 which slot to use. */
8052 assign_386_stack_local (mode, n)
8053 enum machine_mode mode;
8056 if (n < 0 || n >= MAX_386_STACK_LOCALS)
8059 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
8060 ix86_stack_locals[(int) mode][n]
8061 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
8063 return ix86_stack_locals[(int) mode][n];
8066 /* Calculate the length of the memory address in the instruction
8067 encoding. Does not include the one-byte modrm, opcode, or prefix. */
8070 memory_address_length (addr)
8073 struct ix86_address parts;
8074 rtx base, index, disp;
8077 if (GET_CODE (addr) == PRE_DEC
8078 || GET_CODE (addr) == POST_INC
8079 || GET_CODE (addr) == PRE_MODIFY
8080 || GET_CODE (addr) == POST_MODIFY)
8083 if (! ix86_decompose_address (addr, &parts))
8087 index = parts.index;
8091 /* Register Indirect. */
8092 if (base && !index && !disp)
8094 /* Special cases: ebp and esp need the two-byte modrm form. */
8095 if (addr == stack_pointer_rtx
8096 || addr == arg_pointer_rtx
8097 || addr == frame_pointer_rtx
8098 || addr == hard_frame_pointer_rtx)
8102 /* Direct Addressing. */
8103 else if (disp && !base && !index)
8108 /* Find the length of the displacement constant. */
8111 if (GET_CODE (disp) == CONST_INT
8112 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
8118 /* An index requires the two-byte modrm form. */
8126 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
8127 expect that insn have 8bit immediate alternative. */
8129 ix86_attr_length_immediate_default (insn, shortform)
8135 extract_insn_cached (insn);
8136 for (i = recog_data.n_operands - 1; i >= 0; --i)
8137 if (CONSTANT_P (recog_data.operand[i]))
8142 && GET_CODE (recog_data.operand[i]) == CONST_INT
8143 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
8147 switch (get_attr_mode (insn))
8159 fatal_insn ("Unknown insn mode", insn);
8165 /* Compute default value for "length_address" attribute. */
8167 ix86_attr_length_address_default (insn)
8171 extract_insn_cached (insn);
8172 for (i = recog_data.n_operands - 1; i >= 0; --i)
8173 if (GET_CODE (recog_data.operand[i]) == MEM)
8175 return memory_address_length (XEXP (recog_data.operand[i], 0));
8181 /* Return the maximum number of instructions a cpu can issue. */
8188 case PROCESSOR_PENTIUM:
8192 case PROCESSOR_PENTIUMPRO:
8193 case PROCESSOR_PENTIUM4:
8194 case PROCESSOR_ATHLON:
8202 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
8203 by DEP_INSN and nothing set by DEP_INSN. */
8206 ix86_flags_dependant (insn, dep_insn, insn_type)
8208 enum attr_type insn_type;
8212 /* Simplify the test for uninteresting insns. */
8213 if (insn_type != TYPE_SETCC
8214 && insn_type != TYPE_ICMOV
8215 && insn_type != TYPE_FCMOV
8216 && insn_type != TYPE_IBR)
8219 if ((set = single_set (dep_insn)) != 0)
8221 set = SET_DEST (set);
8224 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
8225 && XVECLEN (PATTERN (dep_insn), 0) == 2
8226 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
8227 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
8229 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8230 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
8235 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
8238 /* This test is true if the dependant insn reads the flags but
8239 not any other potentially set register. */
8240 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
8243 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
8249 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
8250 address with operands set by DEP_INSN. */
8253 ix86_agi_dependant (insn, dep_insn, insn_type)
8255 enum attr_type insn_type;
8259 if (insn_type == TYPE_LEA)
8261 addr = PATTERN (insn);
8262 if (GET_CODE (addr) == SET)
8264 else if (GET_CODE (addr) == PARALLEL
8265 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
8266 addr = XVECEXP (addr, 0, 0);
8269 addr = SET_SRC (addr);
8274 extract_insn_cached (insn);
8275 for (i = recog_data.n_operands - 1; i >= 0; --i)
8276 if (GET_CODE (recog_data.operand[i]) == MEM)
8278 addr = XEXP (recog_data.operand[i], 0);
8285 return modified_in_p (addr, dep_insn);
8289 ix86_adjust_cost (insn, link, dep_insn, cost)
8290 rtx insn, link, dep_insn;
8293 enum attr_type insn_type, dep_insn_type;
8294 enum attr_memory memory;
8296 int dep_insn_code_number;
8298 /* Anti and output depenancies have zero cost on all CPUs. */
8299 if (REG_NOTE_KIND (link) != 0)
8302 dep_insn_code_number = recog_memoized (dep_insn);
8304 /* If we can't recognize the insns, we can't really do anything. */
8305 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
8308 insn_type = get_attr_type (insn);
8309 dep_insn_type = get_attr_type (dep_insn);
8311 /* Prologue and epilogue allocators can have a false dependency on ebp.
8312 This results in one cycle extra stall on Pentium prologue scheduling,
8313 so handle this important case manually. */
8314 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
8315 && dep_insn_type == TYPE_ALU
8316 && !reg_mentioned_p (stack_pointer_rtx, insn))
8321 case PROCESSOR_PENTIUM:
8322 /* Address Generation Interlock adds a cycle of latency. */
8323 if (ix86_agi_dependant (insn, dep_insn, insn_type))
8326 /* ??? Compares pair with jump/setcc. */
8327 if (ix86_flags_dependant (insn, dep_insn, insn_type))
8330 /* Floating point stores require value to be ready one cycle ealier. */
8331 if (insn_type == TYPE_FMOV
8332 && get_attr_memory (insn) == MEMORY_STORE
8333 && !ix86_agi_dependant (insn, dep_insn, insn_type))
8337 case PROCESSOR_PENTIUMPRO:
8338 /* Since we can't represent delayed latencies of load+operation,
8339 increase the cost here for non-imov insns. */
8340 if (dep_insn_type != TYPE_IMOV
8341 && dep_insn_type != TYPE_FMOV
8342 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8343 || memory == MEMORY_BOTH))
8346 /* INT->FP conversion is expensive. */
8347 if (get_attr_fp_int_src (dep_insn))
8350 /* There is one cycle extra latency between an FP op and a store. */
8351 if (insn_type == TYPE_FMOV
8352 && (set = single_set (dep_insn)) != NULL_RTX
8353 && (set2 = single_set (insn)) != NULL_RTX
8354 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
8355 && GET_CODE (SET_DEST (set2)) == MEM)
8360 /* The esp dependency is resolved before the instruction is really
8362 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
8363 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
8366 /* Since we can't represent delayed latencies of load+operation,
8367 increase the cost here for non-imov insns. */
8368 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
8369 || memory == MEMORY_BOTH)
8370 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
8372 /* INT->FP conversion is expensive. */
8373 if (get_attr_fp_int_src (dep_insn))
8377 case PROCESSOR_ATHLON:
8378 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
8379 || memory == MEMORY_BOTH)
8381 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
8396 struct ppro_sched_data
8399 int issued_this_cycle;
8404 ix86_safe_length (insn)
8407 if (recog_memoized (insn) >= 0)
8408 return get_attr_length(insn);
8414 ix86_safe_length_prefix (insn)
8417 if (recog_memoized (insn) >= 0)
8418 return get_attr_length(insn);
8423 static enum attr_memory
8424 ix86_safe_memory (insn)
8427 if (recog_memoized (insn) >= 0)
8428 return get_attr_memory(insn);
8430 return MEMORY_UNKNOWN;
8433 static enum attr_pent_pair
8434 ix86_safe_pent_pair (insn)
8437 if (recog_memoized (insn) >= 0)
8438 return get_attr_pent_pair(insn);
8440 return PENT_PAIR_NP;
8443 static enum attr_ppro_uops
8444 ix86_safe_ppro_uops (insn)
8447 if (recog_memoized (insn) >= 0)
8448 return get_attr_ppro_uops (insn);
8450 return PPRO_UOPS_MANY;
8454 ix86_dump_ppro_packet (dump)
8457 if (ix86_sched_data.ppro.decode[0])
8459 fprintf (dump, "PPRO packet: %d",
8460 INSN_UID (ix86_sched_data.ppro.decode[0]));
8461 if (ix86_sched_data.ppro.decode[1])
8462 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
8463 if (ix86_sched_data.ppro.decode[2])
8464 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
8469 /* We're beginning a new block. Initialize data structures as necessary. */
8472 ix86_sched_init (dump, sched_verbose)
8473 FILE *dump ATTRIBUTE_UNUSED;
8474 int sched_verbose ATTRIBUTE_UNUSED;
8476 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
8479 /* Shift INSN to SLOT, and shift everything else down. */
8482 ix86_reorder_insn (insnp, slot)
8489 insnp[0] = insnp[1];
8490 while (++insnp != slot);
8495 /* Find an instruction with given pairability and minimal amount of cycles
8496 lost by the fact that the CPU waits for both pipelines to finish before
8497 reading next instructions. Also take care that both instructions together
8498 can not exceed 7 bytes. */
8501 ix86_pent_find_pair (e_ready, ready, type, first)
8504 enum attr_pent_pair type;
8507 int mincycles, cycles;
8508 enum attr_pent_pair tmp;
8509 enum attr_memory memory;
8510 rtx *insnp, *bestinsnp = NULL;
8512 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
8515 memory = ix86_safe_memory (first);
8516 cycles = result_ready_cost (first);
8517 mincycles = INT_MAX;
8519 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
8520 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
8521 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
8523 enum attr_memory second_memory;
8524 int secondcycles, currentcycles;
8526 second_memory = ix86_safe_memory (*insnp);
8527 secondcycles = result_ready_cost (*insnp);
8528 currentcycles = abs (cycles - secondcycles);
8530 if (secondcycles >= 1 && cycles >= 1)
8532 /* Two read/modify/write instructions together takes two
8534 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
8537 /* Read modify/write instruction followed by read/modify
8538 takes one cycle longer. */
8539 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
8540 && tmp != PENT_PAIR_UV
8541 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
8544 if (currentcycles < mincycles)
8545 bestinsnp = insnp, mincycles = currentcycles;
8551 /* Subroutines of ix86_sched_reorder. */
8554 ix86_sched_reorder_pentium (ready, e_ready)
8558 enum attr_pent_pair pair1, pair2;
8561 /* This wouldn't be necessary if Haifa knew that static insn ordering
8562 is important to which pipe an insn is issued to. So we have to make
8563 some minor rearrangements. */
8565 pair1 = ix86_safe_pent_pair (*e_ready);
8567 /* If the first insn is non-pairable, let it be. */
8568 if (pair1 == PENT_PAIR_NP)
8571 pair2 = PENT_PAIR_NP;
8574 /* If the first insn is UV or PV pairable, search for a PU
8576 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
8578 insnp = ix86_pent_find_pair (e_ready-1, ready,
8579 PENT_PAIR_PU, *e_ready);
8581 pair2 = PENT_PAIR_PU;
8584 /* If the first insn is PU or UV pairable, search for a PV
8586 if (pair2 == PENT_PAIR_NP
8587 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
8589 insnp = ix86_pent_find_pair (e_ready-1, ready,
8590 PENT_PAIR_PV, *e_ready);
8592 pair2 = PENT_PAIR_PV;
8595 /* If the first insn is pairable, search for a UV
8597 if (pair2 == PENT_PAIR_NP)
8599 insnp = ix86_pent_find_pair (e_ready-1, ready,
8600 PENT_PAIR_UV, *e_ready);
8602 pair2 = PENT_PAIR_UV;
8605 if (pair2 == PENT_PAIR_NP)
8608 /* Found something! Decide if we need to swap the order. */
8609 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
8610 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
8611 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
8612 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
8613 ix86_reorder_insn (insnp, e_ready);
8615 ix86_reorder_insn (insnp, e_ready - 1);
8619 ix86_sched_reorder_ppro (ready, e_ready)
8624 enum attr_ppro_uops cur_uops;
8625 int issued_this_cycle;
8629 /* At this point .ppro.decode contains the state of the three
8630 decoders from last "cycle". That is, those insns that were
8631 actually independent. But here we're scheduling for the
8632 decoder, and we may find things that are decodable in the
8635 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
8636 issued_this_cycle = 0;
8639 cur_uops = ix86_safe_ppro_uops (*insnp);
8641 /* If the decoders are empty, and we've a complex insn at the
8642 head of the priority queue, let it issue without complaint. */
8643 if (decode[0] == NULL)
8645 if (cur_uops == PPRO_UOPS_MANY)
8651 /* Otherwise, search for a 2-4 uop unsn to issue. */
8652 while (cur_uops != PPRO_UOPS_FEW)
8656 cur_uops = ix86_safe_ppro_uops (*--insnp);
8659 /* If so, move it to the head of the line. */
8660 if (cur_uops == PPRO_UOPS_FEW)
8661 ix86_reorder_insn (insnp, e_ready);
8663 /* Issue the head of the queue. */
8664 issued_this_cycle = 1;
8665 decode[0] = *e_ready--;
8668 /* Look for simple insns to fill in the other two slots. */
8669 for (i = 1; i < 3; ++i)
8670 if (decode[i] == NULL)
8672 if (ready >= e_ready)
8676 cur_uops = ix86_safe_ppro_uops (*insnp);
8677 while (cur_uops != PPRO_UOPS_ONE)
8681 cur_uops = ix86_safe_ppro_uops (*--insnp);
8684 /* Found one. Move it to the head of the queue and issue it. */
8685 if (cur_uops == PPRO_UOPS_ONE)
8687 ix86_reorder_insn (insnp, e_ready);
8688 decode[i] = *e_ready--;
8689 issued_this_cycle++;
8693 /* ??? Didn't find one. Ideally, here we would do a lazy split
8694 of 2-uop insns, issue one and queue the other. */
8698 if (issued_this_cycle == 0)
8699 issued_this_cycle = 1;
8700 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
8703 /* We are about to being issuing insns for this clock cycle.
8704 Override the default sort algorithm to better slot instructions. */
8706 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
8707 FILE *dump ATTRIBUTE_UNUSED;
8708 int sched_verbose ATTRIBUTE_UNUSED;
8711 int clock_var ATTRIBUTE_UNUSED;
8713 rtx *e_ready = ready + n_ready - 1;
8723 case PROCESSOR_PENTIUM:
8724 ix86_sched_reorder_pentium (ready, e_ready);
8727 case PROCESSOR_PENTIUMPRO:
8728 ix86_sched_reorder_ppro (ready, e_ready);
8733 return ix86_issue_rate ();
8736 /* We are about to issue INSN. Return the number of insns left on the
8737 ready queue that can be issued this cycle. */
8740 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
8750 return can_issue_more - 1;
8752 case PROCESSOR_PENTIUMPRO:
8754 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
8756 if (uops == PPRO_UOPS_MANY)
8759 ix86_dump_ppro_packet (dump);
8760 ix86_sched_data.ppro.decode[0] = insn;
8761 ix86_sched_data.ppro.decode[1] = NULL;
8762 ix86_sched_data.ppro.decode[2] = NULL;
8764 ix86_dump_ppro_packet (dump);
8765 ix86_sched_data.ppro.decode[0] = NULL;
8767 else if (uops == PPRO_UOPS_FEW)
8770 ix86_dump_ppro_packet (dump);
8771 ix86_sched_data.ppro.decode[0] = insn;
8772 ix86_sched_data.ppro.decode[1] = NULL;
8773 ix86_sched_data.ppro.decode[2] = NULL;
8777 for (i = 0; i < 3; ++i)
8778 if (ix86_sched_data.ppro.decode[i] == NULL)
8780 ix86_sched_data.ppro.decode[i] = insn;
8788 ix86_dump_ppro_packet (dump);
8789 ix86_sched_data.ppro.decode[0] = NULL;
8790 ix86_sched_data.ppro.decode[1] = NULL;
8791 ix86_sched_data.ppro.decode[2] = NULL;
8795 return --ix86_sched_data.ppro.issued_this_cycle;
8799 /* Walk through INSNS and look for MEM references whose address is DSTREG or
8800 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
8804 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
8806 rtx dstref, srcref, dstreg, srcreg;
8810 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
8812 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
8816 /* Subroutine of above to actually do the updating by recursively walking
8820 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
8822 rtx dstref, srcref, dstreg, srcreg;
8824 enum rtx_code code = GET_CODE (x);
8825 const char *format_ptr = GET_RTX_FORMAT (code);
8828 if (code == MEM && XEXP (x, 0) == dstreg)
8829 MEM_COPY_ATTRIBUTES (x, dstref);
8830 else if (code == MEM && XEXP (x, 0) == srcreg)
8831 MEM_COPY_ATTRIBUTES (x, srcref);
8833 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
8835 if (*format_ptr == 'e')
8836 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
8838 else if (*format_ptr == 'E')
8839 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8840 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
8845 /* Compute the alignment given to a constant that is being placed in memory.
8846 EXP is the constant and ALIGN is the alignment that the object would
8848 The value of this function is used instead of that alignment to align
8852 ix86_constant_alignment (exp, align)
8856 if (TREE_CODE (exp) == REAL_CST)
8858 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
8860 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
8863 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
8870 /* Compute the alignment for a static variable.
8871 TYPE is the data type, and ALIGN is the alignment that
8872 the object would ordinarily have. The value of this function is used
8873 instead of that alignment to align the object. */
8876 ix86_data_alignment (type, align)
8880 if (AGGREGATE_TYPE_P (type)
8882 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8883 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
8884 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
8887 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8888 to 16byte boundary. */
8891 if (AGGREGATE_TYPE_P (type)
8893 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8894 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
8895 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8899 if (TREE_CODE (type) == ARRAY_TYPE)
8901 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8903 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8906 else if (TREE_CODE (type) == COMPLEX_TYPE)
8909 if (TYPE_MODE (type) == DCmode && align < 64)
8911 if (TYPE_MODE (type) == XCmode && align < 128)
8914 else if ((TREE_CODE (type) == RECORD_TYPE
8915 || TREE_CODE (type) == UNION_TYPE
8916 || TREE_CODE (type) == QUAL_UNION_TYPE)
8917 && TYPE_FIELDS (type))
8919 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8921 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8924 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8925 || TREE_CODE (type) == INTEGER_TYPE)
8927 if (TYPE_MODE (type) == DFmode && align < 64)
8929 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8936 /* Compute the alignment for a local variable.
8937 TYPE is the data type, and ALIGN is the alignment that
8938 the object would ordinarily have. The value of this macro is used
8939 instead of that alignment to align the object. */
8942 ix86_local_alignment (type, align)
8946 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
8947 to 16byte boundary. */
8950 if (AGGREGATE_TYPE_P (type)
8952 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8953 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
8954 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
8957 if (TREE_CODE (type) == ARRAY_TYPE)
8959 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
8961 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
8964 else if (TREE_CODE (type) == COMPLEX_TYPE)
8966 if (TYPE_MODE (type) == DCmode && align < 64)
8968 if (TYPE_MODE (type) == XCmode && align < 128)
8971 else if ((TREE_CODE (type) == RECORD_TYPE
8972 || TREE_CODE (type) == UNION_TYPE
8973 || TREE_CODE (type) == QUAL_UNION_TYPE)
8974 && TYPE_FIELDS (type))
8976 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
8978 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
8981 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
8982 || TREE_CODE (type) == INTEGER_TYPE)
8985 if (TYPE_MODE (type) == DFmode && align < 64)
8987 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
8993 /* Emit RTL insns to initialize the variable parts of a trampoline.
8994 FNADDR is an RTX for the address of the function's pure code.
8995 CXT is an RTX for the static chain value for the function. */
8997 x86_initialize_trampoline (tramp, fnaddr, cxt)
8998 rtx tramp, fnaddr, cxt;
9002 /* Compute offset from the end of the jmp to the target function. */
9003 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
9004 plus_constant (tramp, 10),
9005 NULL_RTX, 1, OPTAB_DIRECT);
9006 emit_move_insn (gen_rtx_MEM (QImode, tramp),
9007 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
9008 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
9009 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
9010 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
9011 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
9016 /* Try to load address using shorter movl instead of movabs.
9017 We may want to support movq for kernel mode, but kernel does not use
9018 trampolines at the moment. */
9019 if (x86_64_zero_extended_value (fnaddr))
9021 fnaddr = copy_to_mode_reg (DImode, fnaddr);
9022 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9023 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
9024 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
9025 gen_lowpart (SImode, fnaddr));
9030 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9031 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
9032 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9036 /* Load static chain using movabs to r10. */
9037 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9038 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
9039 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
9042 /* Jump to the r11 */
9043 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
9044 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
9045 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
9046 GEN_INT (trunc_int_for_mode (0xe3, HImode)));
9048 if (offset > TRAMPOLINE_SIZE)
9053 #define def_builtin(NAME, TYPE, CODE) \
9054 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
9055 struct builtin_description
9057 enum insn_code icode;
9059 enum ix86_builtins code;
9060 enum rtx_code comparison;
9064 static struct builtin_description bdesc_comi[] =
9066 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
9067 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
9068 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
9069 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
9070 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
9071 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
9072 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
9073 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
9074 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
9075 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
9076 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
9077 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
9080 static struct builtin_description bdesc_2arg[] =
9083 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
9084 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
9085 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
9086 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
9087 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
9088 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
9089 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
9090 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
9092 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
9093 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
9094 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
9095 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
9096 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
9097 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
9098 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
9099 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
9100 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
9101 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
9102 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
9103 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
9104 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
9105 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
9106 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
9107 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
9108 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
9109 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
9110 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
9111 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
9112 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
9113 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
9114 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
9115 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
9117 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
9118 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
9119 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
9120 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
9122 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
9123 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
9124 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
9125 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
9127 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
9128 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
9129 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
9130 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
9131 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
9134 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
9135 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
9136 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
9137 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
9138 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
9139 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
9141 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
9142 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
9143 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
9144 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
9145 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
9146 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
9147 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
9148 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
9150 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
9151 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
9152 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
9154 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
9155 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
9156 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
9157 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
9159 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
9160 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
9162 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
9163 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
9164 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
9165 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
9166 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
9167 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
9169 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
9170 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
9171 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
9172 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
9174 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
9175 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
9176 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
9177 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
9178 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
9179 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
9182 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
9183 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
9184 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
9186 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
9187 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
9189 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
9190 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
9191 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
9192 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
9193 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
9194 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
9196 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
9197 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
9198 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
9199 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
9200 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
9201 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
9203 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
9204 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
9205 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
9206 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
9208 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
9209 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
9213 static struct builtin_description bdesc_1arg[] =
9215 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
9216 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
9218 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
9219 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
9220 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
9222 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
9223 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
9224 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
9225 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
9229 /* Expand all the target specific builtins. This is not called if TARGET_MMX
9230 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
9233 ix86_init_builtins ()
9235 struct builtin_description * d;
9237 tree endlink = void_list_node;
9239 tree pchar_type_node = build_pointer_type (char_type_node);
9240 tree pfloat_type_node = build_pointer_type (float_type_node);
9241 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
9242 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
9245 tree int_ftype_v4sf_v4sf
9246 = build_function_type (integer_type_node,
9247 tree_cons (NULL_TREE, V4SF_type_node,
9248 tree_cons (NULL_TREE,
9251 tree v4si_ftype_v4sf_v4sf
9252 = build_function_type (V4SI_type_node,
9253 tree_cons (NULL_TREE, V4SF_type_node,
9254 tree_cons (NULL_TREE,
9257 /* MMX/SSE/integer conversions. */
9258 tree int_ftype_v4sf_int
9259 = build_function_type (integer_type_node,
9260 tree_cons (NULL_TREE, V4SF_type_node,
9261 tree_cons (NULL_TREE,
9265 = build_function_type (integer_type_node,
9266 tree_cons (NULL_TREE, V4SF_type_node,
9269 = build_function_type (integer_type_node,
9270 tree_cons (NULL_TREE, V8QI_type_node,
9273 = build_function_type (integer_type_node,
9274 tree_cons (NULL_TREE, V2SI_type_node,
9277 = build_function_type (V2SI_type_node,
9278 tree_cons (NULL_TREE, integer_type_node,
9280 tree v4sf_ftype_v4sf_int
9281 = build_function_type (integer_type_node,
9282 tree_cons (NULL_TREE, V4SF_type_node,
9283 tree_cons (NULL_TREE, integer_type_node,
9285 tree v4sf_ftype_v4sf_v2si
9286 = build_function_type (V4SF_type_node,
9287 tree_cons (NULL_TREE, V4SF_type_node,
9288 tree_cons (NULL_TREE, V2SI_type_node,
9290 tree int_ftype_v4hi_int
9291 = build_function_type (integer_type_node,
9292 tree_cons (NULL_TREE, V4HI_type_node,
9293 tree_cons (NULL_TREE, integer_type_node,
9295 tree v4hi_ftype_v4hi_int_int
9296 = build_function_type (V4HI_type_node,
9297 tree_cons (NULL_TREE, V4HI_type_node,
9298 tree_cons (NULL_TREE, integer_type_node,
9299 tree_cons (NULL_TREE,
9302 /* Miscellaneous. */
9303 tree v8qi_ftype_v4hi_v4hi
9304 = build_function_type (V8QI_type_node,
9305 tree_cons (NULL_TREE, V4HI_type_node,
9306 tree_cons (NULL_TREE, V4HI_type_node,
9308 tree v4hi_ftype_v2si_v2si
9309 = build_function_type (V4HI_type_node,
9310 tree_cons (NULL_TREE, V2SI_type_node,
9311 tree_cons (NULL_TREE, V2SI_type_node,
9313 tree v4sf_ftype_v4sf_v4sf_int
9314 = build_function_type (V4SF_type_node,
9315 tree_cons (NULL_TREE, V4SF_type_node,
9316 tree_cons (NULL_TREE, V4SF_type_node,
9317 tree_cons (NULL_TREE,
9320 tree v4hi_ftype_v8qi_v8qi
9321 = build_function_type (V4HI_type_node,
9322 tree_cons (NULL_TREE, V8QI_type_node,
9323 tree_cons (NULL_TREE, V8QI_type_node,
9325 tree v2si_ftype_v4hi_v4hi
9326 = build_function_type (V2SI_type_node,
9327 tree_cons (NULL_TREE, V4HI_type_node,
9328 tree_cons (NULL_TREE, V4HI_type_node,
9330 tree v4hi_ftype_v4hi_int
9331 = build_function_type (V4HI_type_node,
9332 tree_cons (NULL_TREE, V4HI_type_node,
9333 tree_cons (NULL_TREE, integer_type_node,
9335 tree di_ftype_di_int
9336 = build_function_type (long_long_unsigned_type_node,
9337 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9338 tree_cons (NULL_TREE, integer_type_node,
9340 tree v8qi_ftype_v8qi_di
9341 = build_function_type (V8QI_type_node,
9342 tree_cons (NULL_TREE, V8QI_type_node,
9343 tree_cons (NULL_TREE,
9344 long_long_integer_type_node,
9346 tree v4hi_ftype_v4hi_di
9347 = build_function_type (V4HI_type_node,
9348 tree_cons (NULL_TREE, V4HI_type_node,
9349 tree_cons (NULL_TREE,
9350 long_long_integer_type_node,
9352 tree v2si_ftype_v2si_di
9353 = build_function_type (V2SI_type_node,
9354 tree_cons (NULL_TREE, V2SI_type_node,
9355 tree_cons (NULL_TREE,
9356 long_long_integer_type_node,
9358 tree void_ftype_void
9359 = build_function_type (void_type_node, endlink);
9360 tree void_ftype_pchar_int
9361 = build_function_type (void_type_node,
9362 tree_cons (NULL_TREE, pchar_type_node,
9363 tree_cons (NULL_TREE, integer_type_node,
9365 tree void_ftype_unsigned
9366 = build_function_type (void_type_node,
9367 tree_cons (NULL_TREE, unsigned_type_node,
9369 tree unsigned_ftype_void
9370 = build_function_type (unsigned_type_node, endlink);
9372 = build_function_type (long_long_unsigned_type_node, endlink);
9374 = build_function_type (intTI_type_node, endlink);
9375 tree v2si_ftype_v4sf
9376 = build_function_type (V2SI_type_node,
9377 tree_cons (NULL_TREE, V4SF_type_node,
9380 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
9381 tree_cons (NULL_TREE, V8QI_type_node,
9382 tree_cons (NULL_TREE,
9385 tree void_ftype_v8qi_v8qi_pchar
9386 = build_function_type (void_type_node, maskmovq_args);
9387 tree v4sf_ftype_pfloat
9388 = build_function_type (V4SF_type_node,
9389 tree_cons (NULL_TREE, pfloat_type_node,
9391 tree v4sf_ftype_float
9392 = build_function_type (V4SF_type_node,
9393 tree_cons (NULL_TREE, float_type_node,
9395 tree v4sf_ftype_float_float_float_float
9396 = build_function_type (V4SF_type_node,
9397 tree_cons (NULL_TREE, float_type_node,
9398 tree_cons (NULL_TREE, float_type_node,
9399 tree_cons (NULL_TREE,
9401 tree_cons (NULL_TREE,
9404 /* @@@ the type is bogus */
9405 tree v4sf_ftype_v4sf_pv2si
9406 = build_function_type (V4SF_type_node,
9407 tree_cons (NULL_TREE, V4SF_type_node,
9408 tree_cons (NULL_TREE, pv2si_type_node,
9410 tree v4sf_ftype_pv2si_v4sf
9411 = build_function_type (V4SF_type_node,
9412 tree_cons (NULL_TREE, V4SF_type_node,
9413 tree_cons (NULL_TREE, pv2si_type_node,
9415 tree void_ftype_pfloat_v4sf
9416 = build_function_type (void_type_node,
9417 tree_cons (NULL_TREE, pfloat_type_node,
9418 tree_cons (NULL_TREE, V4SF_type_node,
9420 tree void_ftype_pdi_di
9421 = build_function_type (void_type_node,
9422 tree_cons (NULL_TREE, pdi_type_node,
9423 tree_cons (NULL_TREE,
9424 long_long_unsigned_type_node,
9426 /* Normal vector unops. */
9427 tree v4sf_ftype_v4sf
9428 = build_function_type (V4SF_type_node,
9429 tree_cons (NULL_TREE, V4SF_type_node,
9432 /* Normal vector binops. */
9433 tree v4sf_ftype_v4sf_v4sf
9434 = build_function_type (V4SF_type_node,
9435 tree_cons (NULL_TREE, V4SF_type_node,
9436 tree_cons (NULL_TREE, V4SF_type_node,
9438 tree v8qi_ftype_v8qi_v8qi
9439 = build_function_type (V8QI_type_node,
9440 tree_cons (NULL_TREE, V8QI_type_node,
9441 tree_cons (NULL_TREE, V8QI_type_node,
9443 tree v4hi_ftype_v4hi_v4hi
9444 = build_function_type (V4HI_type_node,
9445 tree_cons (NULL_TREE, V4HI_type_node,
9446 tree_cons (NULL_TREE, V4HI_type_node,
9448 tree v2si_ftype_v2si_v2si
9449 = build_function_type (V2SI_type_node,
9450 tree_cons (NULL_TREE, V2SI_type_node,
9451 tree_cons (NULL_TREE, V2SI_type_node,
9454 = build_function_type (intTI_type_node,
9455 tree_cons (NULL_TREE, intTI_type_node,
9456 tree_cons (NULL_TREE, intTI_type_node,
9459 = build_function_type (long_long_unsigned_type_node,
9460 tree_cons (NULL_TREE, long_long_unsigned_type_node,
9461 tree_cons (NULL_TREE,
9462 long_long_unsigned_type_node,
9465 /* Add all builtins that are more or less simple operations on two
9467 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
9469 /* Use one of the operands; the target can have a different mode for
9470 mask-generating compares. */
9471 enum machine_mode mode;
9476 mode = insn_data[d->icode].operand[1].mode;
9478 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
9484 type = v4sf_ftype_v4sf_v4sf;
9487 type = v8qi_ftype_v8qi_v8qi;
9490 type = v4hi_ftype_v4hi_v4hi;
9493 type = v2si_ftype_v2si_v2si;
9496 type = ti_ftype_ti_ti;
9499 type = di_ftype_di_di;
9506 /* Override for comparisons. */
9507 if (d->icode == CODE_FOR_maskcmpv4sf3
9508 || d->icode == CODE_FOR_maskncmpv4sf3
9509 || d->icode == CODE_FOR_vmmaskcmpv4sf3
9510 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
9511 type = v4si_ftype_v4sf_v4sf;
9513 def_builtin (d->name, type, d->code);
9516 /* Add the remaining MMX insns with somewhat more complicated types. */
9517 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
9518 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
9519 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
9520 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
9521 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
9522 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
9523 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
9524 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
9525 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
9527 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
9528 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
9529 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
9531 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
9532 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
9534 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
9535 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
9537 /* Everything beyond this point is SSE only. */
9541 /* comi/ucomi insns. */
9542 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
9543 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
9545 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
9546 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
9547 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
9549 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
9550 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
9551 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
9552 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
9553 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
9554 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
9556 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
9557 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
9559 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
9561 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
9562 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
9563 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
9564 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
9565 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
9566 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
9568 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
9569 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
9570 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
9571 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
9573 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
9574 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
9575 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
9576 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
9578 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
9579 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
9581 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
9583 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
9584 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
9585 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
9586 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
9587 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
9588 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
9590 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
9592 /* Composite intrinsics. */
9593 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
9594 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
9595 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
9596 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
9597 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
9598 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
9599 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
9602 /* Errors in the source file can cause expand_expr to return const0_rtx
9603 where we expect a vector. To avoid crashing, use one of the vector
9604 clear instructions. */
9606 safe_vector_operand (x, mode)
9608 enum machine_mode mode;
9610 if (x != const0_rtx)
9612 x = gen_reg_rtx (mode);
9614 if (VALID_MMX_REG_MODE (mode))
9615 emit_insn (gen_mmx_clrdi (mode == DImode ? x
9616 : gen_rtx_SUBREG (DImode, x, 0)));
9618 emit_insn (gen_sse_clrti (mode == TImode ? x
9619 : gen_rtx_SUBREG (TImode, x, 0)));
9623 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
9626 ix86_expand_binop_builtin (icode, arglist, target)
9627 enum insn_code icode;
9632 tree arg0 = TREE_VALUE (arglist);
9633 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9634 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9635 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9636 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9637 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9638 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
9640 if (VECTOR_MODE_P (mode0))
9641 op0 = safe_vector_operand (op0, mode0);
9642 if (VECTOR_MODE_P (mode1))
9643 op1 = safe_vector_operand (op1, mode1);
9646 || GET_MODE (target) != tmode
9647 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9648 target = gen_reg_rtx (tmode);
9650 /* In case the insn wants input operands in modes different from
9651 the result, abort. */
9652 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
9655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9656 op0 = copy_to_mode_reg (mode0, op0);
9657 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9658 op1 = copy_to_mode_reg (mode1, op1);
9660 pat = GEN_FCN (icode) (target, op0, op1);
9667 /* Subroutine of ix86_expand_builtin to take care of stores. */
9670 ix86_expand_store_builtin (icode, arglist, shuffle)
9671 enum insn_code icode;
9676 tree arg0 = TREE_VALUE (arglist);
9677 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9678 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9679 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9680 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
9681 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
9683 if (VECTOR_MODE_P (mode1))
9684 op1 = safe_vector_operand (op1, mode1);
9686 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9687 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9688 op1 = copy_to_mode_reg (mode1, op1);
9690 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
9691 pat = GEN_FCN (icode) (op0, op1);
9697 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
9700 ix86_expand_unop_builtin (icode, arglist, target, do_load)
9701 enum insn_code icode;
9707 tree arg0 = TREE_VALUE (arglist);
9708 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9709 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9710 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9713 || GET_MODE (target) != tmode
9714 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9715 target = gen_reg_rtx (tmode);
9717 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
9720 if (VECTOR_MODE_P (mode0))
9721 op0 = safe_vector_operand (op0, mode0);
9723 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9724 op0 = copy_to_mode_reg (mode0, op0);
9727 pat = GEN_FCN (icode) (target, op0);
9734 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
9735 sqrtss, rsqrtss, rcpss. */
9738 ix86_expand_unop1_builtin (icode, arglist, target)
9739 enum insn_code icode;
9744 tree arg0 = TREE_VALUE (arglist);
9745 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9746 enum machine_mode tmode = insn_data[icode].operand[0].mode;
9747 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
9750 || GET_MODE (target) != tmode
9751 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9752 target = gen_reg_rtx (tmode);
9754 if (VECTOR_MODE_P (mode0))
9755 op0 = safe_vector_operand (op0, mode0);
9757 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9758 op0 = copy_to_mode_reg (mode0, op0);
9760 pat = GEN_FCN (icode) (target, op0, op0);
9767 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
9770 ix86_expand_sse_compare (d, arglist, target)
9771 struct builtin_description *d;
9776 tree arg0 = TREE_VALUE (arglist);
9777 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9778 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9779 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9781 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
9782 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
9783 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
9784 enum rtx_code comparison = d->comparison;
9786 if (VECTOR_MODE_P (mode0))
9787 op0 = safe_vector_operand (op0, mode0);
9788 if (VECTOR_MODE_P (mode1))
9789 op1 = safe_vector_operand (op1, mode1);
9791 /* Swap operands if we have a comparison that isn't available in
9795 target = gen_reg_rtx (tmode);
9796 emit_move_insn (target, op1);
9799 comparison = swap_condition (comparison);
9802 || GET_MODE (target) != tmode
9803 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
9804 target = gen_reg_rtx (tmode);
9806 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
9807 op0 = copy_to_mode_reg (mode0, op0);
9808 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
9809 op1 = copy_to_mode_reg (mode1, op1);
9811 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9812 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
9819 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
9822 ix86_expand_sse_comi (d, arglist, target)
9823 struct builtin_description *d;
9828 tree arg0 = TREE_VALUE (arglist);
9829 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9830 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9831 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9833 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
9834 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
9835 enum rtx_code comparison = d->comparison;
9837 if (VECTOR_MODE_P (mode0))
9838 op0 = safe_vector_operand (op0, mode0);
9839 if (VECTOR_MODE_P (mode1))
9840 op1 = safe_vector_operand (op1, mode1);
9842 /* Swap operands if we have a comparison that isn't available in
9849 comparison = swap_condition (comparison);
9852 target = gen_reg_rtx (SImode);
9853 emit_move_insn (target, const0_rtx);
9854 target = gen_rtx_SUBREG (QImode, target, 0);
9856 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
9857 op0 = copy_to_mode_reg (mode0, op0);
9858 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
9859 op1 = copy_to_mode_reg (mode1, op1);
9861 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
9862 pat = GEN_FCN (d->icode) (op0, op1, op2);
9866 emit_insn (gen_setcc_2 (target, op2));
9871 /* Expand an expression EXP that calls a built-in function,
9872 with result going to TARGET if that's convenient
9873 (and in mode MODE if that's convenient).
9874 SUBTARGET may be used as the target for computing one of EXP's operands.
9875 IGNORE is nonzero if the value is to be ignored. */
9878 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
9881 rtx subtarget ATTRIBUTE_UNUSED;
9882 enum machine_mode mode ATTRIBUTE_UNUSED;
9883 int ignore ATTRIBUTE_UNUSED;
9885 struct builtin_description *d;
9887 enum insn_code icode;
9888 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9889 tree arglist = TREE_OPERAND (exp, 1);
9890 tree arg0, arg1, arg2, arg3;
9891 rtx op0, op1, op2, pat;
9892 enum machine_mode tmode, mode0, mode1, mode2;
9893 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9897 case IX86_BUILTIN_EMMS:
9898 emit_insn (gen_emms ());
9901 case IX86_BUILTIN_SFENCE:
9902 emit_insn (gen_sfence ());
9905 case IX86_BUILTIN_M_FROM_INT:
9906 target = gen_reg_rtx (DImode);
9907 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9908 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
9911 case IX86_BUILTIN_M_TO_INT:
9912 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
9913 op0 = copy_to_mode_reg (DImode, op0);
9914 target = gen_reg_rtx (SImode);
9915 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
9918 case IX86_BUILTIN_PEXTRW:
9919 icode = CODE_FOR_mmx_pextrw;
9920 arg0 = TREE_VALUE (arglist);
9921 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9922 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9923 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9924 tmode = insn_data[icode].operand[0].mode;
9925 mode0 = insn_data[icode].operand[1].mode;
9926 mode1 = insn_data[icode].operand[2].mode;
9928 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9929 op0 = copy_to_mode_reg (mode0, op0);
9930 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9932 /* @@@ better error message */
9933 error ("selector must be an immediate");
9937 || GET_MODE (target) != tmode
9938 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9939 target = gen_reg_rtx (tmode);
9940 pat = GEN_FCN (icode) (target, op0, op1);
9946 case IX86_BUILTIN_PINSRW:
9947 icode = CODE_FOR_mmx_pinsrw;
9948 arg0 = TREE_VALUE (arglist);
9949 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
9950 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9952 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9953 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9954 tmode = insn_data[icode].operand[0].mode;
9955 mode0 = insn_data[icode].operand[1].mode;
9956 mode1 = insn_data[icode].operand[2].mode;
9957 mode2 = insn_data[icode].operand[3].mode;
9959 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9960 op0 = copy_to_mode_reg (mode0, op0);
9961 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
9962 op1 = copy_to_mode_reg (mode1, op1);
9963 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
9965 /* @@@ better error message */
9966 error ("selector must be an immediate");
9970 || GET_MODE (target) != tmode
9971 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9972 target = gen_reg_rtx (tmode);
9973 pat = GEN_FCN (icode) (target, op0, op1, op2);
9979 case IX86_BUILTIN_MASKMOVQ:
9980 icode = CODE_FOR_mmx_maskmovq;
9981 /* Note the arg order is different from the operand order. */
9982 arg1 = TREE_VALUE (arglist);
9983 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
9984 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
9985 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
9986 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
9987 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
9988 mode0 = insn_data[icode].operand[0].mode;
9989 mode1 = insn_data[icode].operand[1].mode;
9990 mode2 = insn_data[icode].operand[2].mode;
9992 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
9993 op0 = copy_to_mode_reg (mode0, op0);
9994 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
9995 op1 = copy_to_mode_reg (mode1, op1);
9996 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
9997 op2 = copy_to_mode_reg (mode2, op2);
9998 pat = GEN_FCN (icode) (op0, op1, op2);
10004 case IX86_BUILTIN_SQRTSS:
10005 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
10006 case IX86_BUILTIN_RSQRTSS:
10007 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
10008 case IX86_BUILTIN_RCPSS:
10009 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
10011 case IX86_BUILTIN_LOADAPS:
10012 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
10014 case IX86_BUILTIN_LOADUPS:
10015 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
10017 case IX86_BUILTIN_STOREAPS:
10018 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
10019 case IX86_BUILTIN_STOREUPS:
10020 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
10022 case IX86_BUILTIN_LOADSS:
10023 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
10025 case IX86_BUILTIN_STORESS:
10026 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
10028 case IX86_BUILTIN_LOADHPS:
10029 case IX86_BUILTIN_LOADLPS:
10030 icode = (fcode == IX86_BUILTIN_LOADHPS
10031 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10032 arg0 = TREE_VALUE (arglist);
10033 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10034 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10035 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10036 tmode = insn_data[icode].operand[0].mode;
10037 mode0 = insn_data[icode].operand[1].mode;
10038 mode1 = insn_data[icode].operand[2].mode;
10040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10041 op0 = copy_to_mode_reg (mode0, op0);
10042 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
10044 || GET_MODE (target) != tmode
10045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10046 target = gen_reg_rtx (tmode);
10047 pat = GEN_FCN (icode) (target, op0, op1);
10053 case IX86_BUILTIN_STOREHPS:
10054 case IX86_BUILTIN_STORELPS:
10055 icode = (fcode == IX86_BUILTIN_STOREHPS
10056 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
10057 arg0 = TREE_VALUE (arglist);
10058 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10059 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10060 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10061 mode0 = insn_data[icode].operand[1].mode;
10062 mode1 = insn_data[icode].operand[2].mode;
10064 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
10065 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10066 op1 = copy_to_mode_reg (mode1, op1);
10068 pat = GEN_FCN (icode) (op0, op0, op1);
10074 case IX86_BUILTIN_MOVNTPS:
10075 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
10076 case IX86_BUILTIN_MOVNTQ:
10077 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
10079 case IX86_BUILTIN_LDMXCSR:
10080 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
10081 target = assign_386_stack_local (SImode, 0);
10082 emit_move_insn (target, op0);
10083 emit_insn (gen_ldmxcsr (target));
10086 case IX86_BUILTIN_STMXCSR:
10087 target = assign_386_stack_local (SImode, 0);
10088 emit_insn (gen_stmxcsr (target));
10089 return copy_to_mode_reg (SImode, target);
10091 case IX86_BUILTIN_PREFETCH:
10092 icode = CODE_FOR_prefetch;
10093 arg0 = TREE_VALUE (arglist);
10094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10095 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10096 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10097 mode0 = insn_data[icode].operand[0].mode;
10098 mode1 = insn_data[icode].operand[1].mode;
10100 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
10102 /* @@@ better error message */
10103 error ("selector must be an immediate");
10107 op0 = copy_to_mode_reg (Pmode, op0);
10108 pat = GEN_FCN (icode) (op0, op1);
10114 case IX86_BUILTIN_SHUFPS:
10115 icode = CODE_FOR_sse_shufps;
10116 arg0 = TREE_VALUE (arglist);
10117 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10118 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10119 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10120 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10121 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
10122 tmode = insn_data[icode].operand[0].mode;
10123 mode0 = insn_data[icode].operand[1].mode;
10124 mode1 = insn_data[icode].operand[2].mode;
10125 mode2 = insn_data[icode].operand[3].mode;
10127 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10128 op0 = copy_to_mode_reg (mode0, op0);
10129 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
10130 op1 = copy_to_mode_reg (mode1, op1);
10131 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
10133 /* @@@ better error message */
10134 error ("mask must be an immediate");
10138 || GET_MODE (target) != tmode
10139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10140 target = gen_reg_rtx (tmode);
10141 pat = GEN_FCN (icode) (target, op0, op1, op2);
10147 case IX86_BUILTIN_PSHUFW:
10148 icode = CODE_FOR_mmx_pshufw;
10149 arg0 = TREE_VALUE (arglist);
10150 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10151 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
10152 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
10153 tmode = insn_data[icode].operand[0].mode;
10154 mode0 = insn_data[icode].operand[2].mode;
10155 mode1 = insn_data[icode].operand[3].mode;
10157 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
10158 op0 = copy_to_mode_reg (mode0, op0);
10159 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
10161 /* @@@ better error message */
10162 error ("mask must be an immediate");
10166 || GET_MODE (target) != tmode
10167 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10168 target = gen_reg_rtx (tmode);
10169 pat = GEN_FCN (icode) (target, target, op0, op1);
10175 /* Composite intrinsics. */
10176 case IX86_BUILTIN_SETPS1:
10177 target = assign_386_stack_local (SFmode, 0);
10178 arg0 = TREE_VALUE (arglist);
10179 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
10180 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10181 op0 = gen_reg_rtx (V4SFmode);
10182 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
10183 XEXP (target, 0))));
10184 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
10187 case IX86_BUILTIN_SETPS:
10188 target = assign_386_stack_local (V4SFmode, 0);
10189 op0 = change_address (target, SFmode, XEXP (target, 0));
10190 arg0 = TREE_VALUE (arglist);
10191 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
10192 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
10193 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
10194 emit_move_insn (op0,
10195 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
10196 emit_move_insn (adj_offsettable_operand (op0, 4),
10197 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
10198 emit_move_insn (adj_offsettable_operand (op0, 8),
10199 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
10200 emit_move_insn (adj_offsettable_operand (op0, 12),
10201 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
10202 op0 = gen_reg_rtx (V4SFmode);
10203 emit_insn (gen_sse_movaps (op0, target));
10206 case IX86_BUILTIN_CLRPS:
10207 target = gen_reg_rtx (TImode);
10208 emit_insn (gen_sse_clrti (target));
10211 case IX86_BUILTIN_LOADRPS:
10212 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
10213 gen_reg_rtx (V4SFmode), 1);
10214 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
10217 case IX86_BUILTIN_LOADPS1:
10218 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
10219 gen_reg_rtx (V4SFmode), 1);
10220 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
10223 case IX86_BUILTIN_STOREPS1:
10224 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
10225 case IX86_BUILTIN_STORERPS:
10226 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
10228 case IX86_BUILTIN_MMX_ZERO:
10229 target = gen_reg_rtx (DImode);
10230 emit_insn (gen_mmx_clrdi (target));
10237 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10238 if (d->code == fcode)
10240 /* Compares are treated specially. */
10241 if (d->icode == CODE_FOR_maskcmpv4sf3
10242 || d->icode == CODE_FOR_vmmaskcmpv4sf3
10243 || d->icode == CODE_FOR_maskncmpv4sf3
10244 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
10245 return ix86_expand_sse_compare (d, arglist, target);
10247 return ix86_expand_binop_builtin (d->icode, arglist, target);
10250 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
10251 if (d->code == fcode)
10252 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
10254 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
10255 if (d->code == fcode)
10256 return ix86_expand_sse_comi (d, arglist, target);
10258 /* @@@ Should really do something sensible here. */
10262 /* Store OPERAND to the memory after reload is completed. This means
10263 that we can't easilly use assign_stack_local. */
10265 ix86_force_to_memory (mode, operand)
10266 enum machine_mode mode;
10270 if (!reload_completed)
10272 if (TARGET_64BIT && TARGET_RED_ZONE)
10274 result = gen_rtx_MEM (mode,
10275 gen_rtx_PLUS (Pmode,
10277 GEN_INT (-RED_ZONE_SIZE)));
10278 emit_move_insn (result, operand);
10280 else if (TARGET_64BIT && !TARGET_RED_ZONE)
10286 operand = gen_lowpart (DImode, operand);
10290 gen_rtx_SET (VOIDmode,
10291 gen_rtx_MEM (DImode,
10292 gen_rtx_PRE_DEC (DImode,
10293 stack_pointer_rtx)),
10299 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10308 split_di (&operand, 1, operands, operands + 1);
10310 gen_rtx_SET (VOIDmode,
10311 gen_rtx_MEM (SImode,
10312 gen_rtx_PRE_DEC (Pmode,
10313 stack_pointer_rtx)),
10316 gen_rtx_SET (VOIDmode,
10317 gen_rtx_MEM (SImode,
10318 gen_rtx_PRE_DEC (Pmode,
10319 stack_pointer_rtx)),
10324 /* It is better to store HImodes as SImodes. */
10325 if (!TARGET_PARTIAL_REG_STALL)
10326 operand = gen_lowpart (SImode, operand);
10330 gen_rtx_SET (VOIDmode,
10331 gen_rtx_MEM (GET_MODE (operand),
10332 gen_rtx_PRE_DEC (SImode,
10333 stack_pointer_rtx)),
10339 result = gen_rtx_MEM (mode, stack_pointer_rtx);
10344 /* Free operand from the memory. */
10346 ix86_free_from_memory (mode)
10347 enum machine_mode mode;
10349 if (!TARGET_64BIT || !TARGET_RED_ZONE)
10353 if (mode == DImode || TARGET_64BIT)
10355 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
10359 /* Use LEA to deallocate stack space. In peephole2 it will be converted
10360 to pop or add instruction if registers are available. */
10361 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10362 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10367 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
10368 QImode must go into class Q_REGS.
10369 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
10370 movdf to do mem-to-mem moves through integer regs. */
10372 ix86_preferred_reload_class (x, class)
10374 enum reg_class class;
10376 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
10378 /* SSE can't load any constant directly yet. */
10379 if (SSE_CLASS_P (class))
10381 /* Floats can load 0 and 1. */
10382 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
10384 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
10385 if (MAYBE_SSE_CLASS_P (class))
10386 return (reg_class_subset_p (class, GENERAL_REGS)
10387 ? GENERAL_REGS : FLOAT_REGS);
10391 /* General regs can load everything. */
10392 if (reg_class_subset_p (class, GENERAL_REGS))
10393 return GENERAL_REGS;
10394 /* In case we haven't resolved FLOAT or SSE yet, give up. */
10395 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
10398 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
10400 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
10405 /* If we are copying between general and FP registers, we need a memory
10406 location. The same is true for SSE and MMX registers.
10408 The macro can't work reliably when one of the CLASSES is class containing
10409 registers from multiple units (SSE, MMX, integer). We avoid this by never
10410 combining those units in single alternative in the machine description.
10411 Ensure that this constraint holds to avoid unexpected surprises.
10413 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
10414 enforce these sanity checks. */
10416 ix86_secondary_memory_needed (class1, class2, mode, strict)
10417 enum reg_class class1, class2;
10418 enum machine_mode mode;
10421 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
10422 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
10423 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
10424 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
10425 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
10426 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
10433 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
10434 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
10435 && (mode) != SImode)
10436 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10437 && (mode) != SImode));
10439 /* Return the cost of moving data from a register in class CLASS1 to
10440 one in class CLASS2.
10442 It is not required that the cost always equal 2 when FROM is the same as TO;
10443 on some machines it is expensive to move between registers if they are not
10444 general registers. */
10446 ix86_register_move_cost (mode, class1, class2)
10447 enum machine_mode mode;
10448 enum reg_class class1, class2;
10450 /* In case we require secondary memory, compute cost of the store followed
10451 by load. In case of copying from general_purpose_register we may emit
10452 multiple stores followed by single load causing memory size mismatch
10453 stall. Count this as arbitarily high cost of 20. */
10454 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
10457 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
10459 return (MEMORY_MOVE_COST (mode, class1, 0)
10460 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
10462 /* Moves between SSE/MMX and integer unit are expensive. */
10463 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
10464 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
10465 return ix86_cost->mmxsse_to_integer;
10466 if (MAYBE_FLOAT_CLASS_P (class1))
10467 return ix86_cost->fp_move;
10468 if (MAYBE_SSE_CLASS_P (class1))
10469 return ix86_cost->sse_move;
10470 if (MAYBE_MMX_CLASS_P (class1))
10471 return ix86_cost->mmx_move;
10475 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
10477 ix86_hard_regno_mode_ok (regno, mode)
10479 enum machine_mode mode;
10481 /* Flags and only flags can only hold CCmode values. */
10482 if (CC_REGNO_P (regno))
10483 return GET_MODE_CLASS (mode) == MODE_CC;
10484 if (GET_MODE_CLASS (mode) == MODE_CC
10485 || GET_MODE_CLASS (mode) == MODE_RANDOM
10486 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
10488 if (FP_REGNO_P (regno))
10489 return VALID_FP_MODE_P (mode);
10490 if (SSE_REGNO_P (regno))
10491 return VALID_SSE_REG_MODE (mode);
10492 if (MMX_REGNO_P (regno))
10493 return VALID_MMX_REG_MODE (mode);
10494 /* We handle both integer and floats in the general purpose registers.
10495 In future we should be able to handle vector modes as well. */
10496 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
10498 /* Take care for QImode values - they can be in non-QI regs, but then
10499 they do cause partial register stalls. */
10500 if (regno < 4 || mode != QImode || TARGET_64BIT)
10502 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
10505 /* Return the cost of moving data of mode M between a
10506 register and memory. A value of 2 is the default; this cost is
10507 relative to those in `REGISTER_MOVE_COST'.
10509 If moving between registers and memory is more expensive than
10510 between two registers, you should define this macro to express the
10513 Model also increased moving costs of QImode registers in non
10517 ix86_memory_move_cost (mode, class, in)
10518 enum machine_mode mode;
10519 enum reg_class class;
10522 if (FLOAT_CLASS_P (class))
10540 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
10542 if (SSE_CLASS_P (class))
10545 switch (GET_MODE_SIZE (mode))
10559 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
10561 if (MMX_CLASS_P (class))
10564 switch (GET_MODE_SIZE (mode))
10575 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
10577 switch (GET_MODE_SIZE (mode))
10581 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
10582 : ix86_cost->movzbl_load);
10584 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
10585 : ix86_cost->int_store[0] + 4);
10588 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
10590 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
10591 if (mode == TFmode)
10593 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
10594 * (int) GET_MODE_SIZE (mode) / 4);