1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
46 #ifndef CHECK_STACK_LIMIT
47 #define CHECK_STACK_LIMIT -1
50 /* Processor costs (relative to an add) */
52 struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
60 3, /* cost of movsx */
61 3, /* cost of movzx */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
85 /* Processor costs (relative to an add) */
87 struct processor_costs i386_cost = { /* 386 specific costs */
88 1, /* cost of an add instruction */
89 1, /* cost of a lea instruction */
90 3, /* variable shift costs */
91 2, /* constant shift costs */
92 6, /* cost of starting a multiply */
93 1, /* cost of multiply per each bit set */
94 23, /* cost of a divide/mod */
95 3, /* cost of movsx */
96 2, /* cost of movzx */
97 15, /* "large" insn */
99 4, /* cost for loading QImode using movzbl */
100 {2, 4, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 4, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {8, 8, 8}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {8, 8, 8}, /* cost of loading integer registers */
108 2, /* cost of moving MMX register */
109 {4, 8}, /* cost of loading MMX registers
110 in SImode and DImode */
111 {4, 8}, /* cost of storing MMX registers
112 in SImode and DImode */
113 2, /* cost of moving SSE register */
114 {4, 8, 16}, /* cost of loading SSE registers
115 in SImode, DImode and TImode */
116 {4, 8, 16}, /* cost of storing SSE registers
117 in SImode, DImode and TImode */
118 3, /* MMX or SSE register to integer */
122 struct processor_costs i486_cost = { /* 486 specific costs */
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 3, /* variable shift costs */
126 2, /* constant shift costs */
127 12, /* cost of starting a multiply */
128 1, /* cost of multiply per each bit set */
129 40, /* cost of a divide/mod */
130 3, /* cost of movsx */
131 2, /* cost of movzx */
132 15, /* "large" insn */
134 4, /* cost for loading QImode using movzbl */
135 {2, 4, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 4, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {8, 8, 8}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {8, 8, 8}, /* cost of loading integer registers */
143 2, /* cost of moving MMX register */
144 {4, 8}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {4, 8}, /* cost of storing MMX registers
147 in SImode and DImode */
148 2, /* cost of moving SSE register */
149 {4, 8, 16}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {4, 8, 16}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3 /* MMX or SSE register to integer */
157 struct processor_costs pentium_cost = {
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 4, /* variable shift costs */
161 1, /* constant shift costs */
162 11, /* cost of starting a multiply */
163 0, /* cost of multiply per each bit set */
164 25, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 8, /* "large" insn */
169 6, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 6}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {4, 4, 6}, /* cost of loading integer registers */
178 8, /* cost of moving MMX register */
179 {8, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {8, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3 /* MMX or SSE register to integer */
192 struct processor_costs pentiumpro_cost = {
193 1, /* cost of an add instruction */
194 1, /* cost of a lea instruction */
195 1, /* variable shift costs */
196 1, /* constant shift costs */
197 4, /* cost of starting a multiply */
198 0, /* cost of multiply per each bit set */
199 17, /* cost of a divide/mod */
200 1, /* cost of movsx */
201 1, /* cost of movzx */
202 8, /* "large" insn */
204 2, /* cost for loading QImode using movzbl */
205 {4, 4, 4}, /* cost of loading integer registers
206 in QImode, HImode and SImode.
207 Relative to reg-reg move (2). */
208 {2, 2, 2}, /* cost of storing integer registers */
209 2, /* cost of reg,reg fld/fst */
210 {2, 2, 6}, /* cost of loading fp registers
211 in SFmode, DFmode and XFmode */
212 {4, 4, 6}, /* cost of loading integer registers */
213 2, /* cost of moving MMX register */
214 {2, 2}, /* cost of loading MMX registers
215 in SImode and DImode */
216 {2, 2}, /* cost of storing MMX registers
217 in SImode and DImode */
218 2, /* cost of moving SSE register */
219 {2, 2, 8}, /* cost of loading SSE registers
220 in SImode, DImode and TImode */
221 {2, 2, 8}, /* cost of storing SSE registers
222 in SImode, DImode and TImode */
223 3 /* MMX or SSE register to integer */
227 struct processor_costs k6_cost = {
228 1, /* cost of an add instruction */
229 2, /* cost of a lea instruction */
230 1, /* variable shift costs */
231 1, /* constant shift costs */
232 3, /* cost of starting a multiply */
233 0, /* cost of multiply per each bit set */
234 18, /* cost of a divide/mod */
235 2, /* cost of movsx */
236 2, /* cost of movzx */
237 8, /* "large" insn */
239 3, /* cost for loading QImode using movzbl */
240 {4, 5, 4}, /* cost of loading integer registers
241 in QImode, HImode and SImode.
242 Relative to reg-reg move (2). */
243 {2, 3, 2}, /* cost of storing integer registers */
244 4, /* cost of reg,reg fld/fst */
245 {6, 6, 6}, /* cost of loading fp registers
246 in SFmode, DFmode and XFmode */
247 {4, 4, 4}, /* cost of loading integer registers */
248 2, /* cost of moving MMX register */
249 {2, 2}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {2, 2}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {2, 2, 8}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {2, 2, 8}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 6 /* MMX or SSE register to integer */
262 struct processor_costs athlon_cost = {
263 1, /* cost of an add instruction */
264 2, /* cost of a lea instruction */
265 1, /* variable shift costs */
266 1, /* constant shift costs */
267 5, /* cost of starting a multiply */
268 0, /* cost of multiply per each bit set */
269 42, /* cost of a divide/mod */
270 1, /* cost of movsx */
271 1, /* cost of movzx */
272 8, /* "large" insn */
274 4, /* cost for loading QImode using movzbl */
275 {4, 5, 4}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 3, 2}, /* cost of storing integer registers */
279 4, /* cost of reg,reg fld/fst */
280 {6, 6, 20}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 16}, /* cost of loading integer registers */
283 2, /* cost of moving MMX register */
284 {2, 2}, /* cost of loading MMX registers
285 in SImode and DImode */
286 {2, 2}, /* cost of storing MMX registers
287 in SImode and DImode */
288 2, /* cost of moving SSE register */
289 {2, 2, 8}, /* cost of loading SSE registers
290 in SImode, DImode and TImode */
291 {2, 2, 8}, /* cost of storing SSE registers
292 in SImode, DImode and TImode */
293 6 /* MMX or SSE register to integer */
297 struct processor_costs pentium4_cost = {
298 1, /* cost of an add instruction */
299 1, /* cost of a lea instruction */
300 8, /* variable shift costs */
301 8, /* constant shift costs */
302 30, /* cost of starting a multiply */
303 0, /* cost of multiply per each bit set */
304 112, /* cost of a divide/mod */
305 1, /* cost of movsx */
306 1, /* cost of movzx */
307 16, /* "large" insn */
309 2, /* cost for loading QImode using movzbl */
310 {4, 5, 4}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 3, 2}, /* cost of storing integer registers */
314 2, /* cost of reg,reg fld/fst */
315 {2, 2, 6}, /* cost of loading fp registers
316 in SFmode, DFmode and XFmode */
317 {4, 4, 6}, /* cost of loading integer registers */
318 2, /* cost of moving MMX register */
319 {2, 2}, /* cost of loading MMX registers
320 in SImode and DImode */
321 {2, 2}, /* cost of storing MMX registers
322 in SImode and DImode */
323 12, /* cost of moving SSE register */
324 {12, 12, 12}, /* cost of loading SSE registers
325 in SImode, DImode and TImode */
326 {2, 2, 8}, /* cost of storing SSE registers
327 in SImode, DImode and TImode */
328 10, /* MMX or SSE register to integer */
331 const struct processor_costs *ix86_cost = &pentium_cost;
333 /* Processor feature/optimization bitmasks. */
334 #define m_386 (1<<PROCESSOR_I386)
335 #define m_486 (1<<PROCESSOR_I486)
336 #define m_PENT (1<<PROCESSOR_PENTIUM)
337 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
338 #define m_K6 (1<<PROCESSOR_K6)
339 #define m_ATHLON (1<<PROCESSOR_ATHLON)
340 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
342 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
343 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
344 const int x86_zero_extend_with_and = m_486 | m_PENT;
345 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
346 const int x86_double_with_add = ~m_386;
347 const int x86_use_bit_test = m_386;
348 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
349 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
350 const int x86_3dnow_a = m_ATHLON;
351 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
352 const int x86_branch_hints = m_PENT4;
353 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
354 const int x86_partial_reg_stall = m_PPRO;
355 const int x86_use_loop = m_K6;
356 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
357 const int x86_use_mov0 = m_K6;
358 const int x86_use_cltd = ~(m_PENT | m_K6);
359 const int x86_read_modify_write = ~m_PENT;
360 const int x86_read_modify = ~(m_PENT | m_PPRO);
361 const int x86_split_long_moves = m_PPRO;
362 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
363 const int x86_single_stringop = m_386 | m_PENT4;
364 const int x86_qimode_math = ~(0);
365 const int x86_promote_qi_regs = 0;
366 const int x86_himode_math = ~(m_PPRO);
367 const int x86_promote_hi_regs = m_PPRO;
368 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
369 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
370 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
371 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
372 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
373 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
374 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
375 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
376 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
377 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
378 const int x86_decompose_lea = m_PENT4;
380 /* In case the avreage insn count for single function invocation is
381 lower than this constant, emit fast (but longer) prologue and
383 #define FAST_PROLOGUE_INSN_COUNT 30
384 /* Set by prologue expander and used by epilogue expander to determine
386 static int use_fast_prologue_epilogue;
388 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
390 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
391 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
392 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
394 /* Array of the smallest class containing reg number REGNO, indexed by
395 REGNO. Used by REGNO_REG_CLASS in i386.h. */
397 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
400 AREG, DREG, CREG, BREG,
402 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
404 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
405 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
408 /* flags, fpsr, dirflag, frame */
409 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
410 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
412 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
414 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
415 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
416 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
420 /* The "default" register map used in 32bit mode. */
422 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
424 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
425 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
426 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
427 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
428 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
429 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
430 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
433 static int x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
434 1 /*RDX*/, 2 /*RCX*/,
435 FIRST_REX_INT_REG /*R8 */,
436 FIRST_REX_INT_REG + 1 /*R9 */};
437 static int x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
439 /* The "default" register map used in 64bit mode. */
440 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
442 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
443 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
446 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
447 8,9,10,11,12,13,14,15, /* extended integer registers */
448 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
451 /* Define the register numbers to be used in Dwarf debugging information.
452 The SVR4 reference port C compiler uses the following register numbers
453 in its Dwarf output code:
454 0 for %eax (gcc regno = 0)
455 1 for %ecx (gcc regno = 2)
456 2 for %edx (gcc regno = 1)
457 3 for %ebx (gcc regno = 3)
458 4 for %esp (gcc regno = 7)
459 5 for %ebp (gcc regno = 6)
460 6 for %esi (gcc regno = 4)
461 7 for %edi (gcc regno = 5)
462 The following three DWARF register numbers are never generated by
463 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
464 believes these numbers have these meanings.
465 8 for %eip (no gcc equivalent)
466 9 for %eflags (gcc regno = 17)
467 10 for %trapno (no gcc equivalent)
468 It is not at all clear how we should number the FP stack registers
469 for the x86 architecture. If the version of SDB on x86/svr4 were
470 a bit less brain dead with respect to floating-point then we would
471 have a precedent to follow with respect to DWARF register numbers
472 for x86 FP registers, but the SDB on x86/svr4 is so completely
473 broken with respect to FP registers that it is hardly worth thinking
474 of it as something to strive for compatibility with.
475 The version of x86/svr4 SDB I have at the moment does (partially)
476 seem to believe that DWARF register number 11 is associated with
477 the x86 register %st(0), but that's about all. Higher DWARF
478 register numbers don't seem to be associated with anything in
479 particular, and even for DWARF regno 11, SDB only seems to under-
480 stand that it should say that a variable lives in %st(0) (when
481 asked via an `=' command) if we said it was in DWARF regno 11,
482 but SDB still prints garbage when asked for the value of the
483 variable in question (via a `/' command).
484 (Also note that the labels SDB prints for various FP stack regs
485 when doing an `x' command are all wrong.)
486 Note that these problems generally don't affect the native SVR4
487 C compiler because it doesn't allow the use of -O with -g and
488 because when it is *not* optimizing, it allocates a memory
489 location for each floating-point variable, and the memory
490 location is what gets described in the DWARF AT_location
491 attribute for the variable in question.
492 Regardless of the severe mental illness of the x86/svr4 SDB, we
493 do something sensible here and we use the following DWARF
494 register numbers. Note that these are all stack-top-relative
496 11 for %st(0) (gcc regno = 8)
497 12 for %st(1) (gcc regno = 9)
498 13 for %st(2) (gcc regno = 10)
499 14 for %st(3) (gcc regno = 11)
500 15 for %st(4) (gcc regno = 12)
501 16 for %st(5) (gcc regno = 13)
502 17 for %st(6) (gcc regno = 14)
503 18 for %st(7) (gcc regno = 15)
505 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
507 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
508 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
509 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
510 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
511 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
512 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
513 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
516 /* Test and compare insns in i386.md store the information needed to
517 generate branch and scc insns here. */
519 struct rtx_def *ix86_compare_op0 = NULL_RTX;
520 struct rtx_def *ix86_compare_op1 = NULL_RTX;
522 #define MAX_386_STACK_LOCALS 3
523 /* Size of the register save area. */
524 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
526 /* Define the structure for the machine field in struct function. */
527 struct machine_function
529 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
530 int save_varrargs_registers;
531 int accesses_prev_frame;
534 #define ix86_stack_locals (cfun->machine->stack_locals)
535 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
537 /* Structure describing stack frame layout.
538 Stack grows downward:
544 saved frame pointer if frame_pointer_needed
545 <- HARD_FRAME_POINTER
551 > to_allocate <- FRAME_POINTER
563 int outgoing_arguments_size;
566 HOST_WIDE_INT to_allocate;
567 /* The offsets relative to ARG_POINTER. */
568 HOST_WIDE_INT frame_pointer_offset;
569 HOST_WIDE_INT hard_frame_pointer_offset;
570 HOST_WIDE_INT stack_pointer_offset;
573 /* Code model option as passed by user. */
574 const char *ix86_cmodel_string;
576 enum cmodel ix86_cmodel;
578 /* which cpu are we scheduling for */
579 enum processor_type ix86_cpu;
581 /* which instruction set architecture to use. */
584 /* Strings to hold which cpu and instruction set architecture to use. */
585 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
586 const char *ix86_arch_string; /* for -march=<xxx> */
588 /* # of registers to use to pass arguments. */
589 const char *ix86_regparm_string;
591 /* ix86_regparm_string as a number */
594 /* Alignment to use for loops and jumps: */
596 /* Power of two alignment for loops. */
597 const char *ix86_align_loops_string;
599 /* Power of two alignment for non-loop jumps. */
600 const char *ix86_align_jumps_string;
602 /* Power of two alignment for stack boundary in bytes. */
603 const char *ix86_preferred_stack_boundary_string;
605 /* Preferred alignment for stack boundary in bits. */
606 int ix86_preferred_stack_boundary;
608 /* Values 1-5: see jump.c */
609 int ix86_branch_cost;
610 const char *ix86_branch_cost_string;
612 /* Power of two alignment for functions. */
613 const char *ix86_align_funcs_string;
615 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
616 static char internal_label_prefix[16];
617 static int internal_label_prefix_len;
619 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
620 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
621 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
623 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
624 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
626 static rtx gen_push PARAMS ((rtx));
627 static int memory_address_length PARAMS ((rtx addr));
628 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
629 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
630 static int ix86_safe_length PARAMS ((rtx));
631 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
632 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
633 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
634 static void ix86_dump_ppro_packet PARAMS ((FILE *));
635 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
636 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
638 static void ix86_init_machine_status PARAMS ((struct function *));
639 static void ix86_mark_machine_status PARAMS ((struct function *));
640 static void ix86_free_machine_status PARAMS ((struct function *));
641 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
642 static int ix86_safe_length_prefix PARAMS ((rtx));
643 static int ix86_nsaved_regs PARAMS((void));
644 static void ix86_emit_save_regs PARAMS((void));
645 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
646 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
647 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
648 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
649 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
650 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
651 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
652 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
653 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
654 static int ix86_issue_rate PARAMS ((void));
655 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
656 static void ix86_sched_init PARAMS ((FILE *, int, int));
657 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
658 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
662 rtx base, index, disp;
666 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
668 struct builtin_description;
669 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
671 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
673 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
674 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
675 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
676 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
677 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
678 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
679 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
683 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
685 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
686 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
687 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
688 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
689 static int ix86_save_reg PARAMS ((int, int));
690 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
691 static int ix86_comp_type_attributes PARAMS ((tree, tree));
692 const struct attribute_spec ix86_attribute_table[];
693 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
694 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
696 #ifdef DO_GLOBAL_CTORS_BODY
697 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
700 /* Register class used for passing given 64bit part of the argument.
701 These represent classes as documented by the PS ABI, with the exception
702 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
703 use SF or DFmode move instead of DImode to avoid reformating penalties.
705 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
706 whenever possible (upper half does contain padding).
708 enum x86_64_reg_class
711 X86_64_INTEGER_CLASS,
712 X86_64_INTEGERSI_CLASS,
721 const char * const x86_64_reg_class_name[] =
722 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
724 #define MAX_CLASSES 4
725 static int classify_argument PARAMS ((enum machine_mode, tree,
726 enum x86_64_reg_class [MAX_CLASSES],
728 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
730 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
732 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
733 enum x86_64_reg_class));
735 /* Initialize the GCC target structure. */
736 #undef TARGET_ATTRIBUTE_TABLE
737 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
738 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
739 # undef TARGET_MERGE_DECL_ATTRIBUTES
740 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
743 #undef TARGET_COMP_TYPE_ATTRIBUTES
744 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
746 #undef TARGET_INIT_BUILTINS
747 #define TARGET_INIT_BUILTINS ix86_init_builtins
749 #undef TARGET_EXPAND_BUILTIN
750 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
752 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
753 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
755 # undef TARGET_ASM_FUNCTION_PROLOGUE
756 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
759 #undef TARGET_ASM_OPEN_PAREN
760 #define TARGET_ASM_OPEN_PAREN ""
761 #undef TARGET_ASM_CLOSE_PAREN
762 #define TARGET_ASM_CLOSE_PAREN ""
764 #undef TARGET_SCHED_ADJUST_COST
765 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
766 #undef TARGET_SCHED_ISSUE_RATE
767 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
768 #undef TARGET_SCHED_VARIABLE_ISSUE
769 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
770 #undef TARGET_SCHED_INIT
771 #define TARGET_SCHED_INIT ix86_sched_init
772 #undef TARGET_SCHED_REORDER
773 #define TARGET_SCHED_REORDER ix86_sched_reorder
775 struct gcc_target targetm = TARGET_INITIALIZER;
777 /* Sometimes certain combinations of command options do not make
778 sense on a particular target machine. You can define a macro
779 `OVERRIDE_OPTIONS' to take account of this. This macro, if
780 defined, is executed once just after all the command options have
783 Don't use this macro to turn on various extra optimizations for
784 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
790 /* Comes from final.c -- no real reason to change it. */
791 #define MAX_CODE_ALIGN 16
795 const struct processor_costs *cost; /* Processor costs */
796 const int target_enable; /* Target flags to enable. */
797 const int target_disable; /* Target flags to disable. */
798 const int align_loop; /* Default alignments. */
799 const int align_loop_max_skip;
800 const int align_jump;
801 const int align_jump_max_skip;
802 const int align_func;
803 const int branch_cost;
805 const processor_target_table[PROCESSOR_max] =
807 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
808 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
809 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
810 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
811 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
812 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
813 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
818 const char *const name; /* processor name or nickname. */
819 const enum processor_type processor;
821 const processor_alias_table[] =
823 {"i386", PROCESSOR_I386},
824 {"i486", PROCESSOR_I486},
825 {"i586", PROCESSOR_PENTIUM},
826 {"pentium", PROCESSOR_PENTIUM},
827 {"i686", PROCESSOR_PENTIUMPRO},
828 {"pentiumpro", PROCESSOR_PENTIUMPRO},
829 {"k6", PROCESSOR_K6},
830 {"athlon", PROCESSOR_ATHLON},
831 {"pentium4", PROCESSOR_PENTIUM4},
834 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
836 #ifdef SUBTARGET_OVERRIDE_OPTIONS
837 SUBTARGET_OVERRIDE_OPTIONS;
840 ix86_arch = PROCESSOR_I386;
841 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
843 if (ix86_cmodel_string != 0)
845 if (!strcmp (ix86_cmodel_string, "small"))
846 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
848 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
849 else if (!strcmp (ix86_cmodel_string, "32"))
851 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
852 ix86_cmodel = CM_KERNEL;
853 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
854 ix86_cmodel = CM_MEDIUM;
855 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
856 ix86_cmodel = CM_LARGE;
858 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
864 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
866 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
867 error ("code model `%s' not supported in the %s bit mode",
868 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
869 if (ix86_cmodel == CM_LARGE)
870 sorry ("code model `large' not supported yet");
871 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
872 sorry ("%i-bit mode not compiled in",
873 (target_flags & MASK_64BIT) ? 64 : 32);
875 if (ix86_arch_string != 0)
877 for (i = 0; i < pta_size; i++)
878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
880 ix86_arch = processor_alias_table[i].processor;
881 /* Default cpu tuning to the architecture. */
882 ix86_cpu = ix86_arch;
887 error ("bad value (%s) for -march= switch", ix86_arch_string);
890 if (ix86_cpu_string != 0)
892 for (i = 0; i < pta_size; i++)
893 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
895 ix86_cpu = processor_alias_table[i].processor;
899 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
903 ix86_cost = &size_cost;
905 ix86_cost = processor_target_table[ix86_cpu].cost;
906 target_flags |= processor_target_table[ix86_cpu].target_enable;
907 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
909 /* Arrange to set up i386_stack_locals for all functions. */
910 init_machine_status = ix86_init_machine_status;
911 mark_machine_status = ix86_mark_machine_status;
912 free_machine_status = ix86_free_machine_status;
914 /* Validate -mregparm= value. */
915 if (ix86_regparm_string)
917 i = atoi (ix86_regparm_string);
918 if (i < 0 || i > REGPARM_MAX)
919 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
925 ix86_regparm = REGPARM_MAX;
927 /* If the user has provided any of the -malign-* options,
928 warn and use that value only if -falign-* is not set.
929 Remove this code in GCC 3.2 or later. */
930 if (ix86_align_loops_string)
932 warning ("-malign-loops is obsolete, use -falign-loops");
933 if (align_loops == 0)
935 i = atoi (ix86_align_loops_string);
936 if (i < 0 || i > MAX_CODE_ALIGN)
937 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
939 align_loops = 1 << i;
943 if (ix86_align_jumps_string)
945 warning ("-malign-jumps is obsolete, use -falign-jumps");
946 if (align_jumps == 0)
948 i = atoi (ix86_align_jumps_string);
949 if (i < 0 || i > MAX_CODE_ALIGN)
950 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
952 align_jumps = 1 << i;
956 if (ix86_align_funcs_string)
958 warning ("-malign-functions is obsolete, use -falign-functions");
959 if (align_functions == 0)
961 i = atoi (ix86_align_funcs_string);
962 if (i < 0 || i > MAX_CODE_ALIGN)
963 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
965 align_functions = 1 << i;
969 /* Default align_* from the processor table. */
970 #define abs(n) (n < 0 ? -n : n)
971 if (align_loops == 0)
973 align_loops = processor_target_table[ix86_cpu].align_loop;
974 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
976 if (align_jumps == 0)
978 align_jumps = processor_target_table[ix86_cpu].align_jump;
979 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
981 if (align_functions == 0)
983 align_functions = processor_target_table[ix86_cpu].align_func;
986 /* Validate -mpreferred-stack-boundary= value, or provide default.
987 The default of 128 bits is for Pentium III's SSE __m128, but we
988 don't want additional code to keep the stack aligned when
989 optimizing for code size. */
990 ix86_preferred_stack_boundary = (optimize_size
991 ? TARGET_64BIT ? 64 : 32
993 if (ix86_preferred_stack_boundary_string)
995 i = atoi (ix86_preferred_stack_boundary_string);
996 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
997 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
998 TARGET_64BIT ? 3 : 2);
1000 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1003 /* Validate -mbranch-cost= value, or provide default. */
1004 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1005 if (ix86_branch_cost_string)
1007 i = atoi (ix86_branch_cost_string);
1009 error ("-mbranch-cost=%d is not between 0 and 5", i);
1011 ix86_branch_cost = i;
1014 /* Keep nonleaf frame pointers. */
1015 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1016 flag_omit_frame_pointer = 1;
1018 /* If we're doing fast math, we don't care about comparison order
1019 wrt NaNs. This lets us use a shorter comparison sequence. */
1020 if (flag_unsafe_math_optimizations)
1021 target_flags &= ~MASK_IEEE_FP;
1025 if (TARGET_ALIGN_DOUBLE)
1026 error ("-malign-double makes no sense in the 64bit mode");
1028 error ("-mrtd calling convention not supported in the 64bit mode");
1029 /* Enable by default the SSE and MMX builtins. */
1030 target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
1033 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1036 target_flags |= MASK_MMX;
1038 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1041 target_flags |= MASK_MMX;
1042 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1043 extensions it adds. */
1044 if (x86_3dnow_a & (1 << ix86_arch))
1045 target_flags |= MASK_3DNOW_A;
1047 if ((x86_accumulate_outgoing_args & CPUMASK)
1048 && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
1050 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1056 p = strchr (internal_label_prefix, 'X');
1057 internal_label_prefix_len = p - internal_label_prefix;
1063 optimization_options (level, size)
1065 int size ATTRIBUTE_UNUSED;
1067 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1068 make the problem with not enough registers even worse. */
1069 #ifdef INSN_SCHEDULING
1071 flag_schedule_insns = 0;
1073 if (TARGET_64BIT && optimize >= 1)
1074 flag_omit_frame_pointer = 1;
1077 flag_pcc_struct_return = 0;
1078 flag_asynchronous_unwind_tables = 1;
1082 /* Table of valid machine attributes. */
1083 const struct attribute_spec ix86_attribute_table[] =
1085 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1086 /* Stdcall attribute says callee is responsible for popping arguments
1087 if they are not variable. */
1088 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1089 /* Cdecl attribute says the callee is a normal C declaration */
1090 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1091 /* Regparm attribute specifies how many integer arguments are to be
1092 passed in registers. */
1093 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1094 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1095 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1096 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1097 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1099 { NULL, 0, 0, false, false, false, NULL }
1102 /* Handle a "cdecl" or "stdcall" attribute;
1103 arguments as in struct attribute_spec.handler. */
1105 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1108 tree args ATTRIBUTE_UNUSED;
1109 int flags ATTRIBUTE_UNUSED;
1112 if (TREE_CODE (*node) != FUNCTION_TYPE
1113 && TREE_CODE (*node) != METHOD_TYPE
1114 && TREE_CODE (*node) != FIELD_DECL
1115 && TREE_CODE (*node) != TYPE_DECL)
1117 warning ("`%s' attribute only applies to functions",
1118 IDENTIFIER_POINTER (name));
1119 *no_add_attrs = true;
1124 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1125 *no_add_attrs = true;
1131 /* Handle a "regparm" attribute;
1132 arguments as in struct attribute_spec.handler. */
1134 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1138 int flags ATTRIBUTE_UNUSED;
1141 if (TREE_CODE (*node) != FUNCTION_TYPE
1142 && TREE_CODE (*node) != METHOD_TYPE
1143 && TREE_CODE (*node) != FIELD_DECL
1144 && TREE_CODE (*node) != TYPE_DECL)
1146 warning ("`%s' attribute only applies to functions",
1147 IDENTIFIER_POINTER (name));
1148 *no_add_attrs = true;
1154 cst = TREE_VALUE (args);
1155 if (TREE_CODE (cst) != INTEGER_CST)
1157 warning ("`%s' attribute requires an integer constant argument",
1158 IDENTIFIER_POINTER (name));
1159 *no_add_attrs = true;
1161 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1163 warning ("argument to `%s' attribute larger than %d",
1164 IDENTIFIER_POINTER (name), REGPARM_MAX);
1165 *no_add_attrs = true;
1172 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1174 /* Generate the assembly code for function entry. FILE is a stdio
1175 stream to output the code to. SIZE is an int: how many units of
1176 temporary storage to allocate.
1178 Refer to the array `regs_ever_live' to determine which registers to
1179 save; `regs_ever_live[I]' is nonzero if register number I is ever
1180 used in the function. This function is responsible for knowing
1181 which registers should not be saved even if used.
1183 We override it here to allow for the new profiling code to go before
1184 the prologue and the old mcount code to go after the prologue (and
1185 after %ebx has been set up for ELF shared library support). */
1188 ix86_osf_output_function_prologue (file, size)
1192 const char *prefix = "";
1193 const char *const lprefix = LPREFIX;
1194 int labelno = profile_label_no;
1198 if (TARGET_UNDERSCORES)
1201 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1203 if (!flag_pic && !HALF_PIC_P ())
1205 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1206 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1209 else if (HALF_PIC_P ())
1213 HALF_PIC_EXTERNAL ("_mcount_ptr");
1214 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1217 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1218 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1220 fprintf (file, "\tcall *(%%eax)\n");
1225 static int call_no = 0;
1227 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1228 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1229 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1230 lprefix, call_no++);
1231 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1233 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1235 fprintf (file, "\tcall *(%%eax)\n");
1241 if (profile_flag && OSF_PROFILE_BEFORE_PROLOGUE)
1245 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1246 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1251 static int call_no = 0;
1253 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1254 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1255 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1256 lprefix, call_no++);
1257 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1259 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1261 fprintf (file, "\tcall *(%%eax)\n");
1264 #endif /* !OSF_OS */
1266 function_prologue (file, size);
1269 #endif /* OSF_OS || TARGET_OSF1ELF */
1271 /* Return 0 if the attributes for two types are incompatible, 1 if they
1272 are compatible, and 2 if they are nearly compatible (which causes a
1273 warning to be generated). */
1276 ix86_comp_type_attributes (type1, type2)
1280 /* Check for mismatch of non-default calling convention. */
1281 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1283 if (TREE_CODE (type1) != FUNCTION_TYPE)
1286 /* Check for mismatched return types (cdecl vs stdcall). */
1287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1293 /* Value is the number of bytes of arguments automatically
1294 popped when returning from a subroutine call.
1295 FUNDECL is the declaration node of the function (as a tree),
1296 FUNTYPE is the data type of the function (as a tree),
1297 or for a library call it is an identifier node for the subroutine name.
1298 SIZE is the number of bytes of arguments passed on the stack.
1300 On the 80386, the RTD insn may be used to pop them if the number
1301 of args is fixed, but if the number is variable then the caller
1302 must pop them all. RTD can't be used for library calls now
1303 because the library is compiled with the Unix compiler.
1304 Use of RTD is a selectable option, since it is incompatible with
1305 standard Unix calling sequences. If the option is not selected,
1306 the caller must always pop the args.
1308 The attribute stdcall is equivalent to RTD on a per module basis. */
1311 ix86_return_pops_args (fundecl, funtype, size)
1316 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1318 /* Cdecl functions override -mrtd, and never pop the stack. */
1319 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1321 /* Stdcall functions will pop the stack if not variable args. */
1322 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1326 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1327 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1328 == void_type_node)))
1332 /* Lose any fake structure return argument. */
1333 if (aggregate_value_p (TREE_TYPE (funtype))
1335 return GET_MODE_SIZE (Pmode);
1340 /* Argument support functions. */
1342 /* Return true when register may be used to pass function parameters. */
1344 ix86_function_arg_regno_p (regno)
1349 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1350 if (SSE_REGNO_P (regno) && TARGET_SSE)
1352 /* RAX is used as hidden argument to va_arg functions. */
1355 for (i = 0; i < REGPARM_MAX; i++)
1356 if (regno == x86_64_int_parameter_registers[i])
1361 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1362 for a call to a function whose data type is FNTYPE.
1363 For a library call, FNTYPE is 0. */
1366 init_cumulative_args (cum, fntype, libname)
1367 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1368 tree fntype; /* tree ptr for function decl */
1369 rtx libname; /* SYMBOL_REF of library name or 0 */
1371 static CUMULATIVE_ARGS zero_cum;
1372 tree param, next_param;
1374 if (TARGET_DEBUG_ARG)
1376 fprintf (stderr, "\ninit_cumulative_args (");
1378 fprintf (stderr, "fntype code = %s, ret code = %s",
1379 tree_code_name[(int) TREE_CODE (fntype)],
1380 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1382 fprintf (stderr, "no fntype");
1385 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1390 /* Set up the number of registers to use for passing arguments. */
1391 cum->nregs = ix86_regparm;
1392 cum->sse_nregs = SSE_REGPARM_MAX;
1393 if (fntype && !TARGET_64BIT)
1395 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1398 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1400 cum->maybe_vaarg = false;
1402 /* Determine if this function has variable arguments. This is
1403 indicated by the last argument being 'void_type_mode' if there
1404 are no variable arguments. If there are variable arguments, then
1405 we won't pass anything in registers */
1409 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1410 param != 0; param = next_param)
1412 next_param = TREE_CHAIN (param);
1413 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1417 cum->maybe_vaarg = true;
1421 if ((!fntype && !libname)
1422 || (fntype && !TYPE_ARG_TYPES (fntype)))
1423 cum->maybe_vaarg = 1;
1425 if (TARGET_DEBUG_ARG)
1426 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1431 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1432 of this code is to classify each 8bytes of incoming argument by the register
1433 class and assign registers accordingly. */
1435 /* Return the union class of CLASS1 and CLASS2.
1436 See the x86-64 PS ABI for details. */
1438 static enum x86_64_reg_class
1439 merge_classes (class1, class2)
1440 enum x86_64_reg_class class1, class2;
1442 /* Rule #1: If both classes are equal, this is the resulting class. */
1443 if (class1 == class2)
1446 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1448 if (class1 == X86_64_NO_CLASS)
1450 if (class2 == X86_64_NO_CLASS)
1453 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1454 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1455 return X86_64_MEMORY_CLASS;
1457 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1458 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1459 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1460 return X86_64_INTEGERSI_CLASS;
1461 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1462 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1463 return X86_64_INTEGER_CLASS;
1465 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1466 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1467 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1468 return X86_64_MEMORY_CLASS;
1470 /* Rule #6: Otherwise class SSE is used. */
1471 return X86_64_SSE_CLASS;
1474 /* Classify the argument of type TYPE and mode MODE.
1475 CLASSES will be filled by the register class used to pass each word
1476 of the operand. The number of words is returned. In case the parameter
1477 should be passed in memory, 0 is returned. As a special case for zero
1478 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1480 BIT_OFFSET is used internally for handling records and specifies offset
1481 of the offset in bits modulo 256 to avoid overflow cases.
1483 See the x86-64 PS ABI for details.
1487 classify_argument (mode, type, classes, bit_offset)
1488 enum machine_mode mode;
1490 enum x86_64_reg_class classes[MAX_CLASSES];
1494 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1495 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1497 if (type && AGGREGATE_TYPE_P (type))
1501 enum x86_64_reg_class subclasses[MAX_CLASSES];
1503 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1507 for (i = 0; i < words; i++)
1508 classes[i] = X86_64_NO_CLASS;
1510 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1511 signalize memory class, so handle it as special case. */
1514 classes[0] = X86_64_NO_CLASS;
1518 /* Classify each field of record and merge classes. */
1519 if (TREE_CODE (type) == RECORD_TYPE)
1521 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1523 if (TREE_CODE (field) == FIELD_DECL)
1527 /* Bitfields are always classified as integer. Handle them
1528 early, since later code would consider them to be
1529 misaligned integers. */
1530 if (DECL_BIT_FIELD (field))
1532 for (i = int_bit_position (field) / 8 / 8;
1533 i < (int_bit_position (field)
1534 + tree_low_cst (DECL_SIZE (field), 0)
1537 merge_classes (X86_64_INTEGER_CLASS,
1542 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1543 TREE_TYPE (field), subclasses,
1544 (int_bit_position (field)
1545 + bit_offset) % 256);
1548 for (i = 0; i < num; i++)
1551 (int_bit_position (field) + bit_offset) / 8 / 8;
1553 merge_classes (subclasses[i], classes[i + pos]);
1559 /* Arrays are handled as small records. */
1560 else if (TREE_CODE (type) == ARRAY_TYPE)
1563 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1564 TREE_TYPE (type), subclasses, bit_offset);
1568 /* The partial classes are now full classes. */
1569 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1570 subclasses[0] = X86_64_SSE_CLASS;
1571 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1572 subclasses[0] = X86_64_INTEGER_CLASS;
1574 for (i = 0; i < words; i++)
1575 classes[i] = subclasses[i % num];
1577 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1578 else if (TREE_CODE (type) == UNION_TYPE)
1580 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1582 if (TREE_CODE (field) == FIELD_DECL)
1585 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1586 TREE_TYPE (field), subclasses,
1590 for (i = 0; i < num; i++)
1591 classes[i] = merge_classes (subclasses[i], classes[i]);
1598 /* Final merger cleanup. */
1599 for (i = 0; i < words; i++)
1601 /* If one class is MEMORY, everything should be passed in
1603 if (classes[i] == X86_64_MEMORY_CLASS)
1606 /* The X86_64_SSEUP_CLASS should be always preceeded by
1607 X86_64_SSE_CLASS. */
1608 if (classes[i] == X86_64_SSEUP_CLASS
1609 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1610 classes[i] = X86_64_SSE_CLASS;
1612 /* X86_64_X87UP_CLASS should be preceeded by X86_64_X87_CLASS. */
1613 if (classes[i] == X86_64_X87UP_CLASS
1614 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1615 classes[i] = X86_64_SSE_CLASS;
1620 /* Compute alignment needed. We align all types to natural boundaries with
1621 exception of XFmode that is aligned to 64bits. */
1622 if (mode != VOIDmode && mode != BLKmode)
1624 int mode_alignment = GET_MODE_BITSIZE (mode);
1627 mode_alignment = 128;
1628 else if (mode == XCmode)
1629 mode_alignment = 256;
1630 /* Misaligned fields are always returned in memory. */
1631 if (bit_offset % mode_alignment)
1635 /* Classification of atomic types. */
1645 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1646 classes[0] = X86_64_INTEGERSI_CLASS;
1648 classes[0] = X86_64_INTEGER_CLASS;
1652 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1655 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1656 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1659 if (!(bit_offset % 64))
1660 classes[0] = X86_64_SSESF_CLASS;
1662 classes[0] = X86_64_SSE_CLASS;
1665 classes[0] = X86_64_SSEDF_CLASS;
1668 classes[0] = X86_64_X87_CLASS;
1669 classes[1] = X86_64_X87UP_CLASS;
1672 classes[0] = X86_64_X87_CLASS;
1673 classes[1] = X86_64_X87UP_CLASS;
1674 classes[2] = X86_64_X87_CLASS;
1675 classes[3] = X86_64_X87UP_CLASS;
1678 classes[0] = X86_64_SSEDF_CLASS;
1679 classes[1] = X86_64_SSEDF_CLASS;
1682 classes[0] = X86_64_SSE_CLASS;
1691 /* Examine the argument and return set number of register required in each
1692 class. Return 0 iff parameter should be passed in memory. */
1694 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1695 enum machine_mode mode;
1697 int *int_nregs, *sse_nregs;
1700 enum x86_64_reg_class class[MAX_CLASSES];
1701 int n = classify_argument (mode, type, class, 0);
1707 for (n--; n >= 0; n--)
1710 case X86_64_INTEGER_CLASS:
1711 case X86_64_INTEGERSI_CLASS:
1714 case X86_64_SSE_CLASS:
1715 case X86_64_SSESF_CLASS:
1716 case X86_64_SSEDF_CLASS:
1719 case X86_64_NO_CLASS:
1720 case X86_64_SSEUP_CLASS:
1722 case X86_64_X87_CLASS:
1723 case X86_64_X87UP_CLASS:
1727 case X86_64_MEMORY_CLASS:
1732 /* Construct container for the argument used by GCC interface. See
1733 FUNCTION_ARG for the detailed description. */
1735 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1736 enum machine_mode mode;
1739 int nintregs, nsseregs;
1740 int *intreg, sse_regno;
1742 enum machine_mode tmpmode;
1744 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1745 enum x86_64_reg_class class[MAX_CLASSES];
1749 int needed_sseregs, needed_intregs;
1750 rtx exp[MAX_CLASSES];
1753 n = classify_argument (mode, type, class, 0);
1754 if (TARGET_DEBUG_ARG)
1757 fprintf (stderr, "Memory class\n");
1760 fprintf (stderr, "Classes:");
1761 for (i = 0; i < n; i++)
1763 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1765 fprintf (stderr, "\n");
1770 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1772 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1775 /* First construct simple cases. Avoid SCmode, since we want to use
1776 single register to pass this type. */
1777 if (n == 1 && mode != SCmode)
1780 case X86_64_INTEGER_CLASS:
1781 case X86_64_INTEGERSI_CLASS:
1782 return gen_rtx_REG (mode, intreg[0]);
1783 case X86_64_SSE_CLASS:
1784 case X86_64_SSESF_CLASS:
1785 case X86_64_SSEDF_CLASS:
1786 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1787 case X86_64_X87_CLASS:
1788 return gen_rtx_REG (mode, FIRST_STACK_REG);
1789 case X86_64_NO_CLASS:
1790 /* Zero sized array, struct or class. */
1795 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1796 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1798 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1799 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1800 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1801 && class[1] == X86_64_INTEGER_CLASS
1802 && (mode == CDImode || mode == TImode)
1803 && intreg[0] + 1 == intreg[1])
1804 return gen_rtx_REG (mode, intreg[0]);
1806 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1807 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1808 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1810 /* Otherwise figure out the entries of the PARALLEL. */
1811 for (i = 0; i < n; i++)
1815 case X86_64_NO_CLASS:
1817 case X86_64_INTEGER_CLASS:
1818 case X86_64_INTEGERSI_CLASS:
1819 /* Merge TImodes on aligned occassions here too. */
1820 if (i * 8 + 8 > bytes)
1821 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1822 else if (class[i] == X86_64_INTEGERSI_CLASS)
1826 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1827 if (tmpmode == BLKmode)
1829 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1830 gen_rtx_REG (tmpmode, *intreg),
1834 case X86_64_SSESF_CLASS:
1835 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1836 gen_rtx_REG (SFmode,
1837 SSE_REGNO (sse_regno)),
1841 case X86_64_SSEDF_CLASS:
1842 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1843 gen_rtx_REG (DFmode,
1844 SSE_REGNO (sse_regno)),
1848 case X86_64_SSE_CLASS:
1849 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1850 tmpmode = TImode, i++;
1853 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1854 gen_rtx_REG (tmpmode,
1855 SSE_REGNO (sse_regno)),
1863 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
1864 for (i = 0; i < nexps; i++)
1865 XVECEXP (ret, 0, i) = exp [i];
1869 /* Update the data in CUM to advance over an argument
1870 of mode MODE and data type TYPE.
1871 (TYPE is null for libcalls where that information may not be available.) */
1874 function_arg_advance (cum, mode, type, named)
1875 CUMULATIVE_ARGS *cum; /* current arg information */
1876 enum machine_mode mode; /* current arg mode */
1877 tree type; /* type of the argument or 0 if lib support */
1878 int named; /* whether or not the argument was named */
1881 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1882 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1884 if (TARGET_DEBUG_ARG)
1886 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
1887 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
1890 int int_nregs, sse_nregs;
1891 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
1892 cum->words += words;
1893 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
1895 cum->nregs -= int_nregs;
1896 cum->sse_nregs -= sse_nregs;
1897 cum->regno += int_nregs;
1898 cum->sse_regno += sse_nregs;
1901 cum->words += words;
1905 if (TARGET_SSE && mode == TImode)
1907 cum->sse_words += words;
1908 cum->sse_nregs -= 1;
1909 cum->sse_regno += 1;
1910 if (cum->sse_nregs <= 0)
1918 cum->words += words;
1919 cum->nregs -= words;
1920 cum->regno += words;
1922 if (cum->nregs <= 0)
1932 /* Define where to put the arguments to a function.
1933 Value is zero to push the argument on the stack,
1934 or a hard register in which to store the argument.
1936 MODE is the argument's machine mode.
1937 TYPE is the data type of the argument (as a tree).
1938 This is null for libcalls where that information may
1940 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1941 the preceding args and about the function being called.
1942 NAMED is nonzero if this argument is a named parameter
1943 (otherwise it is an extra parameter matching an ellipsis). */
1946 function_arg (cum, mode, type, named)
1947 CUMULATIVE_ARGS *cum; /* current arg information */
1948 enum machine_mode mode; /* current arg mode */
1949 tree type; /* type of the argument or 0 if lib support */
1950 int named; /* != 0 for normal args, == 0 for ... args */
1954 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1955 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1957 /* Handle an hidden AL argument containing number of registers for varargs
1958 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
1960 if (mode == VOIDmode)
1963 return GEN_INT (cum->maybe_vaarg
1964 ? (cum->sse_nregs < 0
1972 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
1973 &x86_64_int_parameter_registers [cum->regno],
1978 /* For now, pass fp/complex values on the stack. */
1987 if (words <= cum->nregs)
1988 ret = gen_rtx_REG (mode, cum->regno);
1992 ret = gen_rtx_REG (mode, cum->sse_regno);
1996 if (TARGET_DEBUG_ARG)
1999 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2000 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2003 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
2005 fprintf (stderr, ", stack");
2007 fprintf (stderr, " )\n");
2013 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2017 ix86_function_arg_boundary (mode, type)
2018 enum machine_mode mode;
2023 return PARM_BOUNDARY;
2025 align = TYPE_ALIGN (type);
2027 align = GET_MODE_ALIGNMENT (mode);
2028 if (align < PARM_BOUNDARY)
2029 align = PARM_BOUNDARY;
2035 /* Return true if N is a possible register number of function value. */
2037 ix86_function_value_regno_p (regno)
2042 return ((regno) == 0
2043 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2044 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2046 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2047 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2048 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2051 /* Define how to find the value returned by a function.
2052 VALTYPE is the data type of the value (as a tree).
2053 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2054 otherwise, FUNC is 0. */
2056 ix86_function_value (valtype)
2061 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2062 REGPARM_MAX, SSE_REGPARM_MAX,
2063 x86_64_int_return_registers, 0);
2064 /* For zero sized structures, construct_continer return NULL, but we need
2065 to keep rest of compiler happy by returning meaningfull value. */
2067 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2071 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2074 /* Return false iff type is returned in memory. */
2076 ix86_return_in_memory (type)
2079 int needed_intregs, needed_sseregs;
2082 return !examine_argument (TYPE_MODE (type), type, 1,
2083 &needed_intregs, &needed_sseregs);
2087 if (TYPE_MODE (type) == BLKmode
2088 || (VECTOR_MODE_P (TYPE_MODE (type))
2089 && int_size_in_bytes (type) == 8)
2090 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2091 && TYPE_MODE (type) != TFmode
2092 && !VECTOR_MODE_P (TYPE_MODE (type))))
2098 /* Define how to find the value returned by a library function
2099 assuming the value has mode MODE. */
2101 ix86_libcall_value (mode)
2102 enum machine_mode mode;
2112 return gen_rtx_REG (mode, FIRST_SSE_REG);
2115 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2117 return gen_rtx_REG (mode, 0);
2121 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2124 /* Create the va_list data type. */
2127 ix86_build_va_list ()
2129 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2131 /* For i386 we use plain pointer to argument area. */
2133 return build_pointer_type (char_type_node);
2135 record = make_lang_type (RECORD_TYPE);
2136 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2138 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2139 unsigned_type_node);
2140 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2141 unsigned_type_node);
2142 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2144 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2147 DECL_FIELD_CONTEXT (f_gpr) = record;
2148 DECL_FIELD_CONTEXT (f_fpr) = record;
2149 DECL_FIELD_CONTEXT (f_ovf) = record;
2150 DECL_FIELD_CONTEXT (f_sav) = record;
2152 TREE_CHAIN (record) = type_decl;
2153 TYPE_NAME (record) = type_decl;
2154 TYPE_FIELDS (record) = f_gpr;
2155 TREE_CHAIN (f_gpr) = f_fpr;
2156 TREE_CHAIN (f_fpr) = f_ovf;
2157 TREE_CHAIN (f_ovf) = f_sav;
2159 layout_type (record);
2161 /* The correct type is an array type of one element. */
2162 return build_array_type (record, build_index_type (size_zero_node));
2165 /* Perform any needed actions needed for a function that is receiving a
2166 variable number of arguments.
2170 MODE and TYPE are the mode and type of the current parameter.
2172 PRETEND_SIZE is a variable that should be set to the amount of stack
2173 that must be pushed by the prolog to pretend that our caller pushed
2176 Normally, this macro will push all remaining incoming registers on the
2177 stack and set PRETEND_SIZE to the length of the registers pushed. */
2180 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2181 CUMULATIVE_ARGS *cum;
2182 enum machine_mode mode;
2184 int *pretend_size ATTRIBUTE_UNUSED;
2188 CUMULATIVE_ARGS next_cum;
2189 rtx save_area = NULL_RTX, mem;
2202 /* Indicate to allocate space on the stack for varargs save area. */
2203 ix86_save_varrargs_registers = 1;
2205 fntype = TREE_TYPE (current_function_decl);
2206 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2207 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2208 != void_type_node));
2210 /* For varargs, we do not want to skip the dummy va_dcl argument.
2211 For stdargs, we do want to skip the last named argument. */
2214 function_arg_advance (&next_cum, mode, type, 1);
2217 save_area = frame_pointer_rtx;
2219 set = get_varargs_alias_set ();
2221 for (i = next_cum.regno; i < ix86_regparm; i++)
2223 mem = gen_rtx_MEM (Pmode,
2224 plus_constant (save_area, i * UNITS_PER_WORD));
2225 set_mem_alias_set (mem, set);
2226 emit_move_insn (mem, gen_rtx_REG (Pmode,
2227 x86_64_int_parameter_registers[i]));
2230 if (next_cum.sse_nregs)
2232 /* Now emit code to save SSE registers. The AX parameter contains number
2233 of SSE parameter regsiters used to call this function. We use
2234 sse_prologue_save insn template that produces computed jump across
2235 SSE saves. We need some preparation work to get this working. */
2237 label = gen_label_rtx ();
2238 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2240 /* Compute address to jump to :
2241 label - 5*eax + nnamed_sse_arguments*5 */
2242 tmp_reg = gen_reg_rtx (Pmode);
2243 nsse_reg = gen_reg_rtx (Pmode);
2244 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2245 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2246 gen_rtx_MULT (Pmode, nsse_reg,
2248 if (next_cum.sse_regno)
2251 gen_rtx_CONST (DImode,
2252 gen_rtx_PLUS (DImode,
2254 GEN_INT (next_cum.sse_regno * 4))));
2256 emit_move_insn (nsse_reg, label_ref);
2257 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2259 /* Compute address of memory block we save into. We always use pointer
2260 pointing 127 bytes after first byte to store - this is needed to keep
2261 instruction size limited by 4 bytes. */
2262 tmp_reg = gen_reg_rtx (Pmode);
2263 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2264 plus_constant (save_area,
2265 8 * REGPARM_MAX + 127)));
2266 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2267 set_mem_alias_set (mem, set);
2268 set_mem_align (mem, BITS_PER_WORD);
2270 /* And finally do the dirty job! */
2271 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2272 GEN_INT (next_cum.sse_regno), label));
2277 /* Implement va_start. */
2280 ix86_va_start (stdarg_p, valist, nextarg)
2285 HOST_WIDE_INT words, n_gpr, n_fpr;
2286 tree f_gpr, f_fpr, f_ovf, f_sav;
2287 tree gpr, fpr, ovf, sav, t;
2289 /* Only 64bit target needs something special. */
2292 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2296 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2297 f_fpr = TREE_CHAIN (f_gpr);
2298 f_ovf = TREE_CHAIN (f_fpr);
2299 f_sav = TREE_CHAIN (f_ovf);
2301 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2302 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2303 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2304 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2305 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2307 /* Count number of gp and fp argument registers used. */
2308 words = current_function_args_info.words;
2309 n_gpr = current_function_args_info.regno;
2310 n_fpr = current_function_args_info.sse_regno;
2312 if (TARGET_DEBUG_ARG)
2313 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2314 (int)words, (int)n_gpr, (int)n_fpr);
2316 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2317 build_int_2 (n_gpr * 8, 0));
2318 TREE_SIDE_EFFECTS (t) = 1;
2319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2321 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2322 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2323 TREE_SIDE_EFFECTS (t) = 1;
2324 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2326 /* Find the overflow area. */
2327 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2329 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2330 build_int_2 (words * UNITS_PER_WORD, 0));
2331 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2332 TREE_SIDE_EFFECTS (t) = 1;
2333 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2335 /* Find the register save area.
2336 Prologue of the function save it right above stack frame. */
2337 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2338 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2339 TREE_SIDE_EFFECTS (t) = 1;
2340 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2343 /* Implement va_arg. */
2345 ix86_va_arg (valist, type)
2348 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2349 tree f_gpr, f_fpr, f_ovf, f_sav;
2350 tree gpr, fpr, ovf, sav, t;
2352 rtx lab_false, lab_over = NULL_RTX;
2356 /* Only 64bit target needs something special. */
2359 return std_expand_builtin_va_arg (valist, type);
2362 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2363 f_fpr = TREE_CHAIN (f_gpr);
2364 f_ovf = TREE_CHAIN (f_fpr);
2365 f_sav = TREE_CHAIN (f_ovf);
2367 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2368 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2369 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2370 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2371 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2373 size = int_size_in_bytes (type);
2374 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2376 container = construct_container (TYPE_MODE (type), type, 0,
2377 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2379 * Pull the value out of the saved registers ...
2382 addr_rtx = gen_reg_rtx (Pmode);
2386 rtx int_addr_rtx, sse_addr_rtx;
2387 int needed_intregs, needed_sseregs;
2390 lab_over = gen_label_rtx ();
2391 lab_false = gen_label_rtx ();
2393 examine_argument (TYPE_MODE (type), type, 0,
2394 &needed_intregs, &needed_sseregs);
2397 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2398 || TYPE_ALIGN (type) > 128);
2400 /* In case we are passing structure, verify that it is consetuctive block
2401 on the register save area. If not we need to do moves. */
2402 if (!need_temp && !REG_P (container))
2404 /* Verify that all registers are strictly consetuctive */
2405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2411 rtx slot = XVECEXP (container, 0, i);
2412 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2413 || INTVAL (XEXP (slot, 1)) != i * 16)
2421 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2423 rtx slot = XVECEXP (container, 0, i);
2424 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2425 || INTVAL (XEXP (slot, 1)) != i * 8)
2432 int_addr_rtx = addr_rtx;
2433 sse_addr_rtx = addr_rtx;
2437 int_addr_rtx = gen_reg_rtx (Pmode);
2438 sse_addr_rtx = gen_reg_rtx (Pmode);
2440 /* First ensure that we fit completely in registers. */
2443 emit_cmp_and_jump_insns (expand_expr
2444 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2445 GEN_INT ((REGPARM_MAX - needed_intregs +
2446 1) * 8), GE, const1_rtx, SImode,
2451 emit_cmp_and_jump_insns (expand_expr
2452 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2453 GEN_INT ((SSE_REGPARM_MAX -
2454 needed_sseregs + 1) * 16 +
2455 REGPARM_MAX * 8), GE, const1_rtx,
2456 SImode, 1, lab_false);
2459 /* Compute index to start of area used for integer regs. */
2462 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2463 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2464 if (r != int_addr_rtx)
2465 emit_move_insn (int_addr_rtx, r);
2469 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2470 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2471 if (r != sse_addr_rtx)
2472 emit_move_insn (sse_addr_rtx, r);
2479 /* Never use the memory itself, as it has the alias set. */
2480 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2481 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2482 set_mem_alias_set (mem, get_varargs_alias_set ());
2483 set_mem_align (mem, BITS_PER_UNIT);
2485 for (i = 0; i < XVECLEN (container, 0); i++)
2487 rtx slot = XVECEXP (container, 0, i);
2488 rtx reg = XEXP (slot, 0);
2489 enum machine_mode mode = GET_MODE (reg);
2495 if (SSE_REGNO_P (REGNO (reg)))
2497 src_addr = sse_addr_rtx;
2498 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2502 src_addr = int_addr_rtx;
2503 src_offset = REGNO (reg) * 8;
2505 src_mem = gen_rtx_MEM (mode, src_addr);
2506 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2507 src_mem = adjust_address (src_mem, mode, src_offset);
2508 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2509 emit_move_insn (dest_mem, src_mem);
2516 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2517 build_int_2 (needed_intregs * 8, 0));
2518 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2519 TREE_SIDE_EFFECTS (t) = 1;
2520 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2525 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2526 build_int_2 (needed_sseregs * 16, 0));
2527 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2528 TREE_SIDE_EFFECTS (t) = 1;
2529 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2532 emit_jump_insn (gen_jump (lab_over));
2534 emit_label (lab_false);
2537 /* ... otherwise out of the overflow area. */
2539 /* Care for on-stack alignment if needed. */
2540 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2544 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2545 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2546 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2550 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2552 emit_move_insn (addr_rtx, r);
2555 build (PLUS_EXPR, TREE_TYPE (t), t,
2556 build_int_2 (rsize * UNITS_PER_WORD, 0));
2557 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2558 TREE_SIDE_EFFECTS (t) = 1;
2559 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2562 emit_label (lab_over);
2567 /* Return nonzero if OP is general operand representable on x86_64. */
2570 x86_64_general_operand (op, mode)
2572 enum machine_mode mode;
2575 return general_operand (op, mode);
2576 if (nonimmediate_operand (op, mode))
2578 return x86_64_sign_extended_value (op);
2581 /* Return nonzero if OP is general operand representable on x86_64
2582 as eighter sign extended or zero extended constant. */
2585 x86_64_szext_general_operand (op, mode)
2587 enum machine_mode mode;
2590 return general_operand (op, mode);
2591 if (nonimmediate_operand (op, mode))
2593 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2596 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2599 x86_64_nonmemory_operand (op, mode)
2601 enum machine_mode mode;
2604 return nonmemory_operand (op, mode);
2605 if (register_operand (op, mode))
2607 return x86_64_sign_extended_value (op);
2610 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2613 x86_64_movabs_operand (op, mode)
2615 enum machine_mode mode;
2617 if (!TARGET_64BIT || !flag_pic)
2618 return nonmemory_operand (op, mode);
2619 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2621 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2626 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2629 x86_64_szext_nonmemory_operand (op, mode)
2631 enum machine_mode mode;
2634 return nonmemory_operand (op, mode);
2635 if (register_operand (op, mode))
2637 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2640 /* Return nonzero if OP is immediate operand representable on x86_64. */
2643 x86_64_immediate_operand (op, mode)
2645 enum machine_mode mode;
2648 return immediate_operand (op, mode);
2649 return x86_64_sign_extended_value (op);
2652 /* Return nonzero if OP is immediate operand representable on x86_64. */
2655 x86_64_zext_immediate_operand (op, mode)
2657 enum machine_mode mode ATTRIBUTE_UNUSED;
2659 return x86_64_zero_extended_value (op);
2662 /* Return nonzero if OP is (const_int 1), else return zero. */
2665 const_int_1_operand (op, mode)
2667 enum machine_mode mode ATTRIBUTE_UNUSED;
2669 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2672 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2673 reference and a constant. */
2676 symbolic_operand (op, mode)
2678 enum machine_mode mode ATTRIBUTE_UNUSED;
2680 switch (GET_CODE (op))
2688 if (GET_CODE (op) == SYMBOL_REF
2689 || GET_CODE (op) == LABEL_REF
2690 || (GET_CODE (op) == UNSPEC
2691 && (XINT (op, 1) == 6
2692 || XINT (op, 1) == 7
2693 || XINT (op, 1) == 15)))
2695 if (GET_CODE (op) != PLUS
2696 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2700 if (GET_CODE (op) == SYMBOL_REF
2701 || GET_CODE (op) == LABEL_REF)
2703 /* Only @GOTOFF gets offsets. */
2704 if (GET_CODE (op) != UNSPEC
2705 || XINT (op, 1) != 7)
2708 op = XVECEXP (op, 0, 0);
2709 if (GET_CODE (op) == SYMBOL_REF
2710 || GET_CODE (op) == LABEL_REF)
2719 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2722 pic_symbolic_operand (op, mode)
2724 enum machine_mode mode ATTRIBUTE_UNUSED;
2726 if (GET_CODE (op) != CONST)
2731 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2736 if (GET_CODE (op) == UNSPEC)
2738 if (GET_CODE (op) != PLUS
2739 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2742 if (GET_CODE (op) == UNSPEC)
2748 /* Return true if OP is a symbolic operand that resolves locally. */
2751 local_symbolic_operand (op, mode)
2753 enum machine_mode mode ATTRIBUTE_UNUSED;
2755 if (GET_CODE (op) == LABEL_REF)
2758 if (GET_CODE (op) == CONST
2759 && GET_CODE (XEXP (op, 0)) == PLUS
2760 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2761 op = XEXP (XEXP (op, 0), 0);
2763 if (GET_CODE (op) != SYMBOL_REF)
2766 /* These we've been told are local by varasm and encode_section_info
2768 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2771 /* There is, however, a not insubstantial body of code in the rest of
2772 the compiler that assumes it can just stick the results of
2773 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2774 /* ??? This is a hack. Should update the body of the compiler to
2775 always create a DECL an invoke ENCODE_SECTION_INFO. */
2776 if (strncmp (XSTR (op, 0), internal_label_prefix,
2777 internal_label_prefix_len) == 0)
2783 /* Test for a valid operand for a call instruction. Don't allow the
2784 arg pointer register or virtual regs since they may decay into
2785 reg + const, which the patterns can't handle. */
2788 call_insn_operand (op, mode)
2790 enum machine_mode mode ATTRIBUTE_UNUSED;
2792 /* Disallow indirect through a virtual register. This leads to
2793 compiler aborts when trying to eliminate them. */
2794 if (GET_CODE (op) == REG
2795 && (op == arg_pointer_rtx
2796 || op == frame_pointer_rtx
2797 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2798 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2801 /* Disallow `call 1234'. Due to varying assembler lameness this
2802 gets either rejected or translated to `call .+1234'. */
2803 if (GET_CODE (op) == CONST_INT)
2806 /* Explicitly allow SYMBOL_REF even if pic. */
2807 if (GET_CODE (op) == SYMBOL_REF)
2810 /* Half-pic doesn't allow anything but registers and constants.
2811 We've just taken care of the later. */
2813 return register_operand (op, Pmode);
2815 /* Otherwise we can allow any general_operand in the address. */
2816 return general_operand (op, Pmode);
2820 constant_call_address_operand (op, mode)
2822 enum machine_mode mode ATTRIBUTE_UNUSED;
2824 if (GET_CODE (op) == CONST
2825 && GET_CODE (XEXP (op, 0)) == PLUS
2826 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2827 op = XEXP (XEXP (op, 0), 0);
2828 return GET_CODE (op) == SYMBOL_REF;
2831 /* Match exactly zero and one. */
2834 const0_operand (op, mode)
2836 enum machine_mode mode;
2838 return op == CONST0_RTX (mode);
2842 const1_operand (op, mode)
2844 enum machine_mode mode ATTRIBUTE_UNUSED;
2846 return op == const1_rtx;
2849 /* Match 2, 4, or 8. Used for leal multiplicands. */
2852 const248_operand (op, mode)
2854 enum machine_mode mode ATTRIBUTE_UNUSED;
2856 return (GET_CODE (op) == CONST_INT
2857 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2860 /* True if this is a constant appropriate for an increment or decremenmt. */
2863 incdec_operand (op, mode)
2865 enum machine_mode mode ATTRIBUTE_UNUSED;
2867 /* On Pentium4, the inc and dec operations causes extra dependency on flag
2868 registers, since carry flag is not set. */
2869 if (TARGET_PENTIUM4 && !optimize_size)
2871 return op == const1_rtx || op == constm1_rtx;
2874 /* Return nonzero if OP is acceptable as operand of DImode shift
2878 shiftdi_operand (op, mode)
2880 enum machine_mode mode ATTRIBUTE_UNUSED;
2883 return nonimmediate_operand (op, mode);
2885 return register_operand (op, mode);
2888 /* Return false if this is the stack pointer, or any other fake
2889 register eliminable to the stack pointer. Otherwise, this is
2892 This is used to prevent esp from being used as an index reg.
2893 Which would only happen in pathological cases. */
2896 reg_no_sp_operand (op, mode)
2898 enum machine_mode mode;
2901 if (GET_CODE (t) == SUBREG)
2903 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
2906 return register_operand (op, mode);
2910 mmx_reg_operand (op, mode)
2912 enum machine_mode mode ATTRIBUTE_UNUSED;
2914 return MMX_REG_P (op);
2917 /* Return false if this is any eliminable register. Otherwise
2921 general_no_elim_operand (op, mode)
2923 enum machine_mode mode;
2926 if (GET_CODE (t) == SUBREG)
2928 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2929 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2930 || t == virtual_stack_dynamic_rtx)
2933 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
2934 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
2937 return general_operand (op, mode);
2940 /* Return false if this is any eliminable register. Otherwise
2941 register_operand or const_int. */
2944 nonmemory_no_elim_operand (op, mode)
2946 enum machine_mode mode;
2949 if (GET_CODE (t) == SUBREG)
2951 if (t == arg_pointer_rtx || t == frame_pointer_rtx
2952 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
2953 || t == virtual_stack_dynamic_rtx)
2956 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
2959 /* Return true if op is a Q_REGS class register. */
2962 q_regs_operand (op, mode)
2964 enum machine_mode mode;
2966 if (mode != VOIDmode && GET_MODE (op) != mode)
2968 if (GET_CODE (op) == SUBREG)
2969 op = SUBREG_REG (op);
2970 return QI_REG_P (op);
2973 /* Return true if op is a NON_Q_REGS class register. */
2976 non_q_regs_operand (op, mode)
2978 enum machine_mode mode;
2980 if (mode != VOIDmode && GET_MODE (op) != mode)
2982 if (GET_CODE (op) == SUBREG)
2983 op = SUBREG_REG (op);
2984 return NON_QI_REG_P (op);
2987 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
2990 sse_comparison_operator (op, mode)
2992 enum machine_mode mode ATTRIBUTE_UNUSED;
2994 enum rtx_code code = GET_CODE (op);
2997 /* Operations supported directly. */
3007 /* These are equivalent to ones above in non-IEEE comparisons. */
3014 return !TARGET_IEEE_FP;
3019 /* Return 1 if OP is a valid comparison operator in valid mode. */
3021 ix86_comparison_operator (op, mode)
3023 enum machine_mode mode;
3025 enum machine_mode inmode;
3026 enum rtx_code code = GET_CODE (op);
3027 if (mode != VOIDmode && GET_MODE (op) != mode)
3029 if (GET_RTX_CLASS (code) != '<')
3031 inmode = GET_MODE (XEXP (op, 0));
3033 if (inmode == CCFPmode || inmode == CCFPUmode)
3035 enum rtx_code second_code, bypass_code;
3036 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3037 return (bypass_code == NIL && second_code == NIL);
3044 if (inmode == CCmode || inmode == CCGCmode
3045 || inmode == CCGOCmode || inmode == CCNOmode)
3048 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3049 if (inmode == CCmode)
3053 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3061 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3064 fcmov_comparison_operator (op, mode)
3066 enum machine_mode mode;
3068 enum machine_mode inmode;
3069 enum rtx_code code = GET_CODE (op);
3070 if (mode != VOIDmode && GET_MODE (op) != mode)
3072 if (GET_RTX_CLASS (code) != '<')
3074 inmode = GET_MODE (XEXP (op, 0));
3075 if (inmode == CCFPmode || inmode == CCFPUmode)
3077 enum rtx_code second_code, bypass_code;
3078 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3079 if (bypass_code != NIL || second_code != NIL)
3081 code = ix86_fp_compare_code_to_integer (code);
3083 /* i387 supports just limited amount of conditional codes. */
3086 case LTU: case GTU: case LEU: case GEU:
3087 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3090 case ORDERED: case UNORDERED:
3098 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3101 promotable_binary_operator (op, mode)
3103 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 switch (GET_CODE (op))
3108 /* Modern CPUs have same latency for HImode and SImode multiply,
3109 but 386 and 486 do HImode multiply faster. */
3110 return ix86_cpu > PROCESSOR_I486;
3122 /* Nearly general operand, but accept any const_double, since we wish
3123 to be able to drop them into memory rather than have them get pulled
3127 cmp_fp_expander_operand (op, mode)
3129 enum machine_mode mode;
3131 if (mode != VOIDmode && mode != GET_MODE (op))
3133 if (GET_CODE (op) == CONST_DOUBLE)
3135 return general_operand (op, mode);
3138 /* Match an SI or HImode register for a zero_extract. */
3141 ext_register_operand (op, mode)
3143 enum machine_mode mode ATTRIBUTE_UNUSED;
3146 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3147 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3150 if (!register_operand (op, VOIDmode))
3153 /* Be curefull to accept only registers having upper parts. */
3154 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3155 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3158 /* Return 1 if this is a valid binary floating-point operation.
3159 OP is the expression matched, and MODE is its mode. */
3162 binary_fp_operator (op, mode)
3164 enum machine_mode mode;
3166 if (mode != VOIDmode && mode != GET_MODE (op))
3169 switch (GET_CODE (op))
3175 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3183 mult_operator(op, mode)
3185 enum machine_mode mode ATTRIBUTE_UNUSED;
3187 return GET_CODE (op) == MULT;
3191 div_operator(op, mode)
3193 enum machine_mode mode ATTRIBUTE_UNUSED;
3195 return GET_CODE (op) == DIV;
3199 arith_or_logical_operator (op, mode)
3201 enum machine_mode mode;
3203 return ((mode == VOIDmode || GET_MODE (op) == mode)
3204 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3205 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3208 /* Returns 1 if OP is memory operand with a displacement. */
3211 memory_displacement_operand (op, mode)
3213 enum machine_mode mode;
3215 struct ix86_address parts;
3217 if (! memory_operand (op, mode))
3220 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3223 return parts.disp != NULL_RTX;
3226 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3227 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3229 ??? It seems likely that this will only work because cmpsi is an
3230 expander, and no actual insns use this. */
3233 cmpsi_operand (op, mode)
3235 enum machine_mode mode;
3237 if (nonimmediate_operand (op, mode))
3240 if (GET_CODE (op) == AND
3241 && GET_MODE (op) == SImode
3242 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3243 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3244 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3245 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3246 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3247 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3253 /* Returns 1 if OP is memory operand that can not be represented by the
3257 long_memory_operand (op, mode)
3259 enum machine_mode mode;
3261 if (! memory_operand (op, mode))
3264 return memory_address_length (op) != 0;
3267 /* Return nonzero if the rtx is known aligned. */
3270 aligned_operand (op, mode)
3272 enum machine_mode mode;
3274 struct ix86_address parts;
3276 if (!general_operand (op, mode))
3279 /* Registers and immediate operands are always "aligned". */
3280 if (GET_CODE (op) != MEM)
3283 /* Don't even try to do any aligned optimizations with volatiles. */
3284 if (MEM_VOLATILE_P (op))
3289 /* Pushes and pops are only valid on the stack pointer. */
3290 if (GET_CODE (op) == PRE_DEC
3291 || GET_CODE (op) == POST_INC)
3294 /* Decode the address. */
3295 if (! ix86_decompose_address (op, &parts))
3298 /* Look for some component that isn't known to be aligned. */
3302 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3307 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3312 if (GET_CODE (parts.disp) != CONST_INT
3313 || (INTVAL (parts.disp) & 3) != 0)
3317 /* Didn't find one -- this must be an aligned address. */
3321 /* Return true if the constant is something that can be loaded with
3322 a special instruction. Only handle 0.0 and 1.0; others are less
3326 standard_80387_constant_p (x)
3329 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3331 /* Note that on the 80387, other constants, such as pi, that we should support
3332 too. On some machines, these are much slower to load as standard constant,
3333 than to load from doubles in memory. */
3334 if (x == CONST0_RTX (GET_MODE (x)))
3336 if (x == CONST1_RTX (GET_MODE (x)))
3341 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3344 standard_sse_constant_p (x)
3347 if (GET_CODE (x) != CONST_DOUBLE)
3349 return (x == CONST0_RTX (GET_MODE (x)));
3352 /* Returns 1 if OP contains a symbol reference */
3355 symbolic_reference_mentioned_p (op)
3358 register const char *fmt;
3361 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3364 fmt = GET_RTX_FORMAT (GET_CODE (op));
3365 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3371 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3372 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3376 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3383 /* Return 1 if it is appropriate to emit `ret' instructions in the
3384 body of a function. Do this only if the epilogue is simple, needing a
3385 couple of insns. Prior to reloading, we can't tell how many registers
3386 must be saved, so return 0 then. Return 0 if there is no frame
3387 marker to de-allocate.
3389 If NON_SAVING_SETJMP is defined and true, then it is not possible
3390 for the epilogue to be simple, so return 0. This is a special case
3391 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3392 until final, but jump_optimize may need to know sooner if a
3396 ix86_can_use_return_insn_p ()
3398 struct ix86_frame frame;
3400 #ifdef NON_SAVING_SETJMP
3401 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3405 if (! reload_completed || frame_pointer_needed)
3408 /* Don't allow more than 32 pop, since that's all we can do
3409 with one instruction. */
3410 if (current_function_pops_args
3411 && current_function_args_size >= 32768)
3414 ix86_compute_frame_layout (&frame);
3415 return frame.to_allocate == 0 && frame.nregs == 0;
3418 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3420 x86_64_sign_extended_value (value)
3423 switch (GET_CODE (value))
3425 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3426 to be at least 32 and this all acceptable constants are
3427 represented as CONST_INT. */
3429 if (HOST_BITS_PER_WIDE_INT == 32)
3433 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3434 return trunc_int_for_mode (val, SImode) == val;
3438 /* For certain code models, the symbolic references are known to fit. */
3440 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3442 /* For certain code models, the code is near as well. */
3444 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3446 /* We also may accept the offsetted memory references in certain special
3449 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3450 && XVECLEN (XEXP (value, 0), 0) == 1
3451 && XINT (XEXP (value, 0), 1) == 15)
3453 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3455 rtx op1 = XEXP (XEXP (value, 0), 0);
3456 rtx op2 = XEXP (XEXP (value, 0), 1);
3457 HOST_WIDE_INT offset;
3459 if (ix86_cmodel == CM_LARGE)
3461 if (GET_CODE (op2) != CONST_INT)
3463 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3464 switch (GET_CODE (op1))
3467 /* For CM_SMALL assume that latest object is 1MB before
3468 end of 31bits boundary. We may also accept pretty
3469 large negative constants knowing that all objects are
3470 in the positive half of address space. */
3471 if (ix86_cmodel == CM_SMALL
3472 && offset < 1024*1024*1024
3473 && trunc_int_for_mode (offset, SImode) == offset)
3475 /* For CM_KERNEL we know that all object resist in the
3476 negative half of 32bits address space. We may not
3477 accept negative offsets, since they may be just off
3478 and we may accept pretty large possitive ones. */
3479 if (ix86_cmodel == CM_KERNEL
3481 && trunc_int_for_mode (offset, SImode) == offset)
3485 /* These conditions are similar to SYMBOL_REF ones, just the
3486 constraints for code models differ. */
3487 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3488 && offset < 1024*1024*1024
3489 && trunc_int_for_mode (offset, SImode) == offset)
3491 if (ix86_cmodel == CM_KERNEL
3493 && trunc_int_for_mode (offset, SImode) == offset)
3506 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3508 x86_64_zero_extended_value (value)
3511 switch (GET_CODE (value))
3514 if (HOST_BITS_PER_WIDE_INT == 32)
3515 return (GET_MODE (value) == VOIDmode
3516 && !CONST_DOUBLE_HIGH (value));
3520 if (HOST_BITS_PER_WIDE_INT == 32)
3521 return INTVAL (value) >= 0;
3523 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3526 /* For certain code models, the symbolic references are known to fit. */
3528 return ix86_cmodel == CM_SMALL;
3530 /* For certain code models, the code is near as well. */
3532 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3534 /* We also may accept the offsetted memory references in certain special
3537 if (GET_CODE (XEXP (value, 0)) == PLUS)
3539 rtx op1 = XEXP (XEXP (value, 0), 0);
3540 rtx op2 = XEXP (XEXP (value, 0), 1);
3542 if (ix86_cmodel == CM_LARGE)
3544 switch (GET_CODE (op1))
3548 /* For small code model we may accept pretty large possitive
3549 offsets, since one bit is available for free. Negative
3550 offsets are limited by the size of NULL pointer area
3551 specified by the ABI. */
3552 if (ix86_cmodel == CM_SMALL
3553 && GET_CODE (op2) == CONST_INT
3554 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3555 && (trunc_int_for_mode (INTVAL (op2), SImode)
3558 /* ??? For the kernel, we may accept adjustment of
3559 -0x10000000, since we know that it will just convert
3560 negative address space to possitive, but perhaps this
3561 is not worthwhile. */
3564 /* These conditions are similar to SYMBOL_REF ones, just the
3565 constraints for code models differ. */
3566 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3567 && GET_CODE (op2) == CONST_INT
3568 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3569 && (trunc_int_for_mode (INTVAL (op2), SImode)
3583 /* Value should be nonzero if functions must have frame pointers.
3584 Zero means the frame pointer need not be set up (and parms may
3585 be accessed via the stack pointer) in functions that seem suitable. */
3588 ix86_frame_pointer_required ()
3590 /* If we accessed previous frames, then the generated code expects
3591 to be able to access the saved ebp value in our frame. */
3592 if (cfun->machine->accesses_prev_frame)
3595 /* Several x86 os'es need a frame pointer for other reasons,
3596 usually pertaining to setjmp. */
3597 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3600 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3601 the frame pointer by default. Turn it back on now if we've not
3602 got a leaf function. */
3603 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3609 /* Record that the current function accesses previous call frames. */
3612 ix86_setup_frame_addresses ()
3614 cfun->machine->accesses_prev_frame = 1;
3617 static char pic_label_name[32];
3619 /* This function generates code for -fpic that loads %ebx with
3620 the return address of the caller and then returns. */
3623 ix86_asm_file_end (file)
3628 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3631 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3632 to updating relocations to a section being discarded such that this
3633 doesn't work. Ought to detect this at configure time. */
3635 /* The trick here is to create a linkonce section containing the
3636 pic label thunk, but to refer to it with an internal label.
3637 Because the label is internal, we don't have inter-dso name
3638 binding issues on hosts that don't support ".hidden".
3640 In order to use these macros, however, we must create a fake
3642 if (targetm.have_named_sections)
3644 tree decl = build_decl (FUNCTION_DECL,
3645 get_identifier ("i686.get_pc_thunk"),
3647 DECL_ONE_ONLY (decl) = 1;
3648 UNIQUE_SECTION (decl, 0);
3649 named_section (decl, NULL);
3656 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3657 internal (non-global) label that's being emitted, it didn't make
3658 sense to have .type information for local labels. This caused
3659 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3660 me debug info for a label that you're declaring non-global?) this
3661 was changed to call ASM_OUTPUT_LABEL() instead. */
3663 ASM_OUTPUT_LABEL (file, pic_label_name);
3665 xops[0] = pic_offset_table_rtx;
3666 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3667 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3668 output_asm_insn ("ret", xops);
3672 load_pic_register ()
3679 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3681 if (TARGET_DEEP_BRANCH_PREDICTION)
3683 if (! pic_label_name[0])
3684 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3685 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3689 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3692 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3694 if (! TARGET_DEEP_BRANCH_PREDICTION)
3695 emit_insn (gen_popsi1 (pic_offset_table_rtx));
3697 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3700 /* Generate an "push" pattern for input ARG. */
3706 return gen_rtx_SET (VOIDmode,
3708 gen_rtx_PRE_DEC (Pmode,
3709 stack_pointer_rtx)),
3713 /* Return 1 if we need to save REGNO. */
3715 ix86_save_reg (regno, maybe_eh_return)
3717 int maybe_eh_return;
3721 && regno == PIC_OFFSET_TABLE_REGNUM
3722 && (current_function_uses_pic_offset_table
3723 || current_function_uses_const_pool
3724 || current_function_calls_eh_return))
3727 if (current_function_calls_eh_return && maybe_eh_return)
3732 unsigned test = EH_RETURN_DATA_REGNO(i);
3733 if (test == INVALID_REGNUM)
3735 if (test == (unsigned) regno)
3740 return (regs_ever_live[regno]
3741 && !call_used_regs[regno]
3742 && !fixed_regs[regno]
3743 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3746 /* Return number of registers to be saved on the stack. */
3754 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3755 if (ix86_save_reg (regno, true))
3760 /* Return the offset between two registers, one to be eliminated, and the other
3761 its replacement, at the start of a routine. */
3764 ix86_initial_elimination_offset (from, to)
3768 struct ix86_frame frame;
3769 ix86_compute_frame_layout (&frame);
3771 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3772 return frame.hard_frame_pointer_offset;
3773 else if (from == FRAME_POINTER_REGNUM
3774 && to == HARD_FRAME_POINTER_REGNUM)
3775 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3778 if (to != STACK_POINTER_REGNUM)
3780 else if (from == ARG_POINTER_REGNUM)
3781 return frame.stack_pointer_offset;
3782 else if (from != FRAME_POINTER_REGNUM)
3785 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3789 /* Fill structure ix86_frame about frame of currently computed function. */
3792 ix86_compute_frame_layout (frame)
3793 struct ix86_frame *frame;
3795 HOST_WIDE_INT total_size;
3796 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3798 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3799 HOST_WIDE_INT size = get_frame_size ();
3801 frame->nregs = ix86_nsaved_regs ();
3804 /* Skip return value and save base pointer. */
3805 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3807 frame->hard_frame_pointer_offset = offset;
3809 /* Do some sanity checking of stack_alignment_needed and
3810 preferred_alignment, since i386 port is the only using those features
3811 that may break easily. */
3813 if (size && !stack_alignment_needed)
3815 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3817 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3819 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3822 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3823 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3825 /* Register save area */
3826 offset += frame->nregs * UNITS_PER_WORD;
3829 if (ix86_save_varrargs_registers)
3831 offset += X86_64_VARARGS_SIZE;
3832 frame->va_arg_size = X86_64_VARARGS_SIZE;
3835 frame->va_arg_size = 0;
3837 /* Align start of frame for local function. */
3838 frame->padding1 = ((offset + stack_alignment_needed - 1)
3839 & -stack_alignment_needed) - offset;
3841 offset += frame->padding1;
3843 /* Frame pointer points here. */
3844 frame->frame_pointer_offset = offset;
3848 /* Add outgoing arguments area. */
3849 if (ACCUMULATE_OUTGOING_ARGS)
3851 offset += current_function_outgoing_args_size;
3852 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3855 frame->outgoing_arguments_size = 0;
3857 /* Align stack boundary. */
3858 frame->padding2 = ((offset + preferred_alignment - 1)
3859 & -preferred_alignment) - offset;
3861 offset += frame->padding2;
3863 /* We've reached end of stack frame. */
3864 frame->stack_pointer_offset = offset;
3866 /* Size prologue needs to allocate. */
3867 frame->to_allocate =
3868 (size + frame->padding1 + frame->padding2
3869 + frame->outgoing_arguments_size + frame->va_arg_size);
3871 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
3872 && current_function_is_leaf)
3874 frame->red_zone_size = frame->to_allocate;
3875 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
3876 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
3879 frame->red_zone_size = 0;
3880 frame->to_allocate -= frame->red_zone_size;
3881 frame->stack_pointer_offset -= frame->red_zone_size;
3883 fprintf (stderr, "nregs: %i\n", frame->nregs);
3884 fprintf (stderr, "size: %i\n", size);
3885 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
3886 fprintf (stderr, "padding1: %i\n", frame->padding1);
3887 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
3888 fprintf (stderr, "padding2: %i\n", frame->padding2);
3889 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
3890 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
3891 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
3892 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
3893 frame->hard_frame_pointer_offset);
3894 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
3898 /* Emit code to save registers in the prologue. */
3901 ix86_emit_save_regs ()
3906 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3907 if (ix86_save_reg (regno, true))
3909 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
3910 RTX_FRAME_RELATED_P (insn) = 1;
3914 /* Emit code to save registers using MOV insns. First register
3915 is restored from POINTER + OFFSET. */
3917 ix86_emit_save_regs_using_mov (pointer, offset)
3919 HOST_WIDE_INT offset;
3924 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3925 if (ix86_save_reg (regno, true))
3927 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
3929 gen_rtx_REG (Pmode, regno));
3930 RTX_FRAME_RELATED_P (insn) = 1;
3931 offset += UNITS_PER_WORD;
3935 /* Expand the prologue into a bunch of separate insns. */
3938 ix86_expand_prologue ()
3941 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
3942 || current_function_uses_const_pool)
3944 struct ix86_frame frame;
3946 HOST_WIDE_INT allocate;
3950 use_fast_prologue_epilogue
3951 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
3952 if (TARGET_PROLOGUE_USING_MOVE)
3953 use_mov = use_fast_prologue_epilogue;
3955 ix86_compute_frame_layout (&frame);
3957 /* Note: AT&T enter does NOT have reversed args. Enter is probably
3958 slower on all targets. Also sdb doesn't like it. */
3960 if (frame_pointer_needed)
3962 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
3963 RTX_FRAME_RELATED_P (insn) = 1;
3965 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3966 RTX_FRAME_RELATED_P (insn) = 1;
3969 allocate = frame.to_allocate;
3970 /* In case we are dealing only with single register and empty frame,
3971 push is equivalent of the mov+add sequence. */
3972 if (allocate == 0 && frame.nregs <= 1)
3976 ix86_emit_save_regs ();
3978 allocate += frame.nregs * UNITS_PER_WORD;
3982 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
3984 insn = emit_insn (gen_pro_epilogue_adjust_stack
3985 (stack_pointer_rtx, stack_pointer_rtx,
3986 GEN_INT (-allocate)));
3987 RTX_FRAME_RELATED_P (insn) = 1;
3991 /* ??? Is this only valid for Win32? */
3998 arg0 = gen_rtx_REG (SImode, 0);
3999 emit_move_insn (arg0, GEN_INT (allocate));
4001 sym = gen_rtx_MEM (FUNCTION_MODE,
4002 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4003 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4005 CALL_INSN_FUNCTION_USAGE (insn)
4006 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4007 CALL_INSN_FUNCTION_USAGE (insn));
4011 if (!frame_pointer_needed || !frame.to_allocate)
4012 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4014 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4015 -frame.nregs * UNITS_PER_WORD);
4018 #ifdef SUBTARGET_PROLOGUE
4023 load_pic_register ();
4025 /* If we are profiling, make sure no instructions are scheduled before
4026 the call to mcount. However, if -fpic, the above call will have
4028 if (profile_flag && ! pic_reg_used)
4029 emit_insn (gen_blockage ());
4032 /* Emit code to restore saved registers using MOV insns. First register
4033 is restored from POINTER + OFFSET. */
4035 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4038 int maybe_eh_return;
4042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4043 if (ix86_save_reg (regno, maybe_eh_return))
4045 emit_move_insn (gen_rtx_REG (Pmode, regno),
4046 adjust_address (gen_rtx_MEM (Pmode, pointer),
4048 offset += UNITS_PER_WORD;
4052 /* Restore function stack, frame, and registers. */
4055 ix86_expand_epilogue (style)
4059 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4060 struct ix86_frame frame;
4061 HOST_WIDE_INT offset;
4063 ix86_compute_frame_layout (&frame);
4065 /* Calculate start of saved registers relative to ebp. Special care
4066 must be taken for the normal return case of a function using
4067 eh_return: the eax and edx registers are marked as saved, but not
4068 restored along this path. */
4069 offset = frame.nregs;
4070 if (current_function_calls_eh_return && style != 2)
4072 offset *= -UNITS_PER_WORD;
4074 /* If we're only restoring one register and sp is not valid then
4075 using a move instruction to restore the register since it's
4076 less work than reloading sp and popping the register.
4078 The default code result in stack adjustment using add/lea instruction,
4079 while this code results in LEAVE instruction (or discrete equivalent),
4080 so it is profitable in some other cases as well. Especially when there
4081 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4082 and there is exactly one register to pop. This heruistic may need some
4083 tuning in future. */
4084 if ((!sp_valid && frame.nregs <= 1)
4085 || (TARGET_EPILOGUE_USING_MOVE
4086 && use_fast_prologue_epilogue
4087 && (frame.nregs > 1 || frame.to_allocate))
4088 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4089 || (frame_pointer_needed && TARGET_USE_LEAVE
4090 && use_fast_prologue_epilogue && frame.nregs == 1)
4091 || current_function_calls_eh_return)
4093 /* Restore registers. We can use ebp or esp to address the memory
4094 locations. If both are available, default to ebp, since offsets
4095 are known to be small. Only exception is esp pointing directly to the
4096 end of block of saved registers, where we may simplify addressing
4099 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4100 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4101 frame.to_allocate, style == 2);
4103 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4104 offset, style == 2);
4106 /* eh_return epilogues need %ecx added to the stack pointer. */
4109 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4111 if (frame_pointer_needed)
4113 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4114 tmp = plus_constant (tmp, UNITS_PER_WORD);
4115 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4117 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4118 emit_move_insn (hard_frame_pointer_rtx, tmp);
4120 emit_insn (gen_pro_epilogue_adjust_stack
4121 (stack_pointer_rtx, sa, const0_rtx));
4125 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4126 tmp = plus_constant (tmp, (frame.to_allocate
4127 + frame.nregs * UNITS_PER_WORD));
4128 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4131 else if (!frame_pointer_needed)
4132 emit_insn (gen_pro_epilogue_adjust_stack
4133 (stack_pointer_rtx, stack_pointer_rtx,
4134 GEN_INT (frame.to_allocate
4135 + frame.nregs * UNITS_PER_WORD)));
4136 /* If not an i386, mov & pop is faster than "leave". */
4137 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4138 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4141 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4142 hard_frame_pointer_rtx,
4145 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4147 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4152 /* First step is to deallocate the stack frame so that we can
4153 pop the registers. */
4156 if (!frame_pointer_needed)
4158 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4159 hard_frame_pointer_rtx,
4162 else if (frame.to_allocate)
4163 emit_insn (gen_pro_epilogue_adjust_stack
4164 (stack_pointer_rtx, stack_pointer_rtx,
4165 GEN_INT (frame.to_allocate)));
4167 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4168 if (ix86_save_reg (regno, false))
4171 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4173 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4175 if (frame_pointer_needed)
4177 /* Leave results in shorter dependency chains on CPUs that are
4178 able to grok it fast. */
4179 if (TARGET_USE_LEAVE)
4180 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4181 else if (TARGET_64BIT)
4182 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4184 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4188 /* Sibcall epilogues don't want a return instruction. */
4192 if (current_function_pops_args && current_function_args_size)
4194 rtx popc = GEN_INT (current_function_pops_args);
4196 /* i386 can only pop 64K bytes. If asked to pop more, pop
4197 return address, do explicit add, and jump indirectly to the
4200 if (current_function_pops_args >= 65536)
4202 rtx ecx = gen_rtx_REG (SImode, 2);
4204 /* There are is no "pascal" calling convention in 64bit ABI. */
4208 emit_insn (gen_popsi1 (ecx));
4209 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4210 emit_jump_insn (gen_return_indirect_internal (ecx));
4213 emit_jump_insn (gen_return_pop_internal (popc));
4216 emit_jump_insn (gen_return_internal ());
4219 /* Extract the parts of an RTL expression that is a valid memory address
4220 for an instruction. Return false if the structure of the address is
4224 ix86_decompose_address (addr, out)
4226 struct ix86_address *out;
4228 rtx base = NULL_RTX;
4229 rtx index = NULL_RTX;
4230 rtx disp = NULL_RTX;
4231 HOST_WIDE_INT scale = 1;
4232 rtx scale_rtx = NULL_RTX;
4234 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4236 else if (GET_CODE (addr) == PLUS)
4238 rtx op0 = XEXP (addr, 0);
4239 rtx op1 = XEXP (addr, 1);
4240 enum rtx_code code0 = GET_CODE (op0);
4241 enum rtx_code code1 = GET_CODE (op1);
4243 if (code0 == REG || code0 == SUBREG)
4245 if (code1 == REG || code1 == SUBREG)
4246 index = op0, base = op1; /* index + base */
4248 base = op0, disp = op1; /* base + displacement */
4250 else if (code0 == MULT)
4252 index = XEXP (op0, 0);
4253 scale_rtx = XEXP (op0, 1);
4254 if (code1 == REG || code1 == SUBREG)
4255 base = op1; /* index*scale + base */
4257 disp = op1; /* index*scale + disp */
4259 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4261 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4262 scale_rtx = XEXP (XEXP (op0, 0), 1);
4263 base = XEXP (op0, 1);
4266 else if (code0 == PLUS)
4268 index = XEXP (op0, 0); /* index + base + disp */
4269 base = XEXP (op0, 1);
4275 else if (GET_CODE (addr) == MULT)
4277 index = XEXP (addr, 0); /* index*scale */
4278 scale_rtx = XEXP (addr, 1);
4280 else if (GET_CODE (addr) == ASHIFT)
4284 /* We're called for lea too, which implements ashift on occasion. */
4285 index = XEXP (addr, 0);
4286 tmp = XEXP (addr, 1);
4287 if (GET_CODE (tmp) != CONST_INT)
4289 scale = INTVAL (tmp);
4290 if ((unsigned HOST_WIDE_INT) scale > 3)
4295 disp = addr; /* displacement */
4297 /* Extract the integral value of scale. */
4300 if (GET_CODE (scale_rtx) != CONST_INT)
4302 scale = INTVAL (scale_rtx);
4305 /* Allow arg pointer and stack pointer as index if there is not scaling */
4306 if (base && index && scale == 1
4307 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4308 || index == stack_pointer_rtx))
4315 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4316 if ((base == hard_frame_pointer_rtx
4317 || base == frame_pointer_rtx
4318 || base == arg_pointer_rtx) && !disp)
4321 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4322 Avoid this by transforming to [%esi+0]. */
4323 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4324 && base && !index && !disp
4326 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4329 /* Special case: encode reg+reg instead of reg*2. */
4330 if (!base && index && scale && scale == 2)
4331 base = index, scale = 1;
4333 /* Special case: scaling cannot be encoded without base or displacement. */
4334 if (!base && !disp && index && scale != 1)
4345 /* Return cost of the memory address x.
4346 For i386, it is better to use a complex address than let gcc copy
4347 the address into a reg and make a new pseudo. But not if the address
4348 requires to two regs - that would mean more pseudos with longer
4351 ix86_address_cost (x)
4354 struct ix86_address parts;
4357 if (!ix86_decompose_address (x, &parts))
4360 /* More complex memory references are better. */
4361 if (parts.disp && parts.disp != const0_rtx)
4364 /* Attempt to minimize number of registers in the address. */
4366 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4368 && (!REG_P (parts.index)
4369 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4373 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4375 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4376 && parts.base != parts.index)
4379 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4380 since it's predecode logic can't detect the length of instructions
4381 and it degenerates to vector decoded. Increase cost of such
4382 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4383 to split such addresses or even refuse such addresses at all.
4385 Following addressing modes are affected:
4390 The first and last case may be avoidable by explicitly coding the zero in
4391 memory address, but I don't have AMD-K6 machine handy to check this
4395 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4396 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4397 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4403 /* If X is a machine specific address (i.e. a symbol or label being
4404 referenced as a displacement from the GOT implemented using an
4405 UNSPEC), then return the base term. Otherwise return X. */
4408 ix86_find_base_term (x)
4415 if (GET_CODE (x) != CONST)
4418 if (GET_CODE (term) == PLUS
4419 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4420 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4421 term = XEXP (term, 0);
4422 if (GET_CODE (term) != UNSPEC
4423 || XVECLEN (term, 0) != 1
4424 || XINT (term, 1) != 15)
4427 term = XVECEXP (term, 0, 0);
4429 if (GET_CODE (term) != SYMBOL_REF
4430 && GET_CODE (term) != LABEL_REF)
4436 if (GET_CODE (x) != PLUS
4437 || XEXP (x, 0) != pic_offset_table_rtx
4438 || GET_CODE (XEXP (x, 1)) != CONST)
4441 term = XEXP (XEXP (x, 1), 0);
4443 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4444 term = XEXP (term, 0);
4446 if (GET_CODE (term) != UNSPEC
4447 || XVECLEN (term, 0) != 1
4448 || XINT (term, 1) != 7)
4451 term = XVECEXP (term, 0, 0);
4453 if (GET_CODE (term) != SYMBOL_REF
4454 && GET_CODE (term) != LABEL_REF)
4460 /* Determine if a given CONST RTX is a valid memory displacement
4464 legitimate_pic_address_disp_p (disp)
4467 /* In 64bit mode we can allow direct addresses of symbols and labels
4468 when they are not dynamic symbols. */
4472 if (GET_CODE (disp) == CONST)
4474 /* ??? Handle PIC code models */
4475 if (GET_CODE (x) == PLUS
4476 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4477 && ix86_cmodel == CM_SMALL_PIC
4478 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4479 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4481 if (local_symbolic_operand (x, Pmode))
4484 if (GET_CODE (disp) != CONST)
4486 disp = XEXP (disp, 0);
4490 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4491 of GOT tables. We should not need these anyway. */
4492 if (GET_CODE (disp) != UNSPEC
4493 || XVECLEN (disp, 0) != 1
4494 || XINT (disp, 1) != 15)
4497 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4498 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4503 if (GET_CODE (disp) == PLUS)
4505 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4507 disp = XEXP (disp, 0);
4510 if (GET_CODE (disp) != UNSPEC
4511 || XVECLEN (disp, 0) != 1)
4514 /* Must be @GOT or @GOTOFF. */
4515 switch (XINT (disp, 1))
4518 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4520 case 7: /* @GOTOFF */
4521 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4527 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4528 memory address for an instruction. The MODE argument is the machine mode
4529 for the MEM expression that wants to use this address.
4531 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4532 convert common non-canonical forms to canonical form so that they will
4536 legitimate_address_p (mode, addr, strict)
4537 enum machine_mode mode;
4541 struct ix86_address parts;
4542 rtx base, index, disp;
4543 HOST_WIDE_INT scale;
4544 const char *reason = NULL;
4545 rtx reason_rtx = NULL_RTX;
4547 if (TARGET_DEBUG_ADDR)
4550 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4551 GET_MODE_NAME (mode), strict);
4555 if (! ix86_decompose_address (addr, &parts))
4557 reason = "decomposition failed";
4562 index = parts.index;
4564 scale = parts.scale;
4566 /* Validate base register.
4568 Don't allow SUBREG's here, it can lead to spill failures when the base
4569 is one word out of a two word structure, which is represented internally
4576 if (GET_CODE (base) != REG)
4578 reason = "base is not a register";
4582 if (GET_MODE (base) != Pmode)
4584 reason = "base is not in Pmode";
4588 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4589 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4591 reason = "base is not valid";
4596 /* Validate index register.
4598 Don't allow SUBREG's here, it can lead to spill failures when the index
4599 is one word out of a two word structure, which is represented internally
4606 if (GET_CODE (index) != REG)
4608 reason = "index is not a register";
4612 if (GET_MODE (index) != Pmode)
4614 reason = "index is not in Pmode";
4618 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4619 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4621 reason = "index is not valid";
4626 /* Validate scale factor. */
4629 reason_rtx = GEN_INT (scale);
4632 reason = "scale without index";
4636 if (scale != 2 && scale != 4 && scale != 8)
4638 reason = "scale is not a valid multiplier";
4643 /* Validate displacement. */
4648 if (!CONSTANT_ADDRESS_P (disp))
4650 reason = "displacement is not constant";
4656 if (!x86_64_sign_extended_value (disp))
4658 reason = "displacement is out of range";
4664 if (GET_CODE (disp) == CONST_DOUBLE)
4666 reason = "displacement is a const_double";
4671 if (flag_pic && SYMBOLIC_CONST (disp))
4673 if (TARGET_64BIT && (index || base))
4675 reason = "non-constant pic memory reference";
4678 if (! legitimate_pic_address_disp_p (disp))
4680 reason = "displacement is an invalid pic construct";
4684 /* This code used to verify that a symbolic pic displacement
4685 includes the pic_offset_table_rtx register.
4687 While this is good idea, unfortunately these constructs may
4688 be created by "adds using lea" optimization for incorrect
4697 This code is nonsensical, but results in addressing
4698 GOT table with pic_offset_table_rtx base. We can't
4699 just refuse it easily, since it gets matched by
4700 "addsi3" pattern, that later gets split to lea in the
4701 case output register differs from input. While this
4702 can be handled by separate addsi pattern for this case
4703 that never results in lea, this seems to be easier and
4704 correct fix for crash to disable this test. */
4706 else if (HALF_PIC_P ())
4708 if (! HALF_PIC_ADDRESS_P (disp)
4709 || (base != NULL_RTX || index != NULL_RTX))
4711 reason = "displacement is an invalid half-pic reference";
4717 /* Everything looks valid. */
4718 if (TARGET_DEBUG_ADDR)
4719 fprintf (stderr, "Success.\n");
4723 if (TARGET_DEBUG_ADDR)
4725 fprintf (stderr, "Error: %s\n", reason);
4726 debug_rtx (reason_rtx);
4731 /* Return an unique alias set for the GOT. */
4733 static HOST_WIDE_INT
4734 ix86_GOT_alias_set ()
4736 static HOST_WIDE_INT set = -1;
4738 set = new_alias_set ();
4742 /* Return a legitimate reference for ORIG (an address) using the
4743 register REG. If REG is 0, a new pseudo is generated.
4745 There are two types of references that must be handled:
4747 1. Global data references must load the address from the GOT, via
4748 the PIC reg. An insn is emitted to do this load, and the reg is
4751 2. Static data references, constant pool addresses, and code labels
4752 compute the address as an offset from the GOT, whose base is in
4753 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4754 differentiate them from global data objects. The returned
4755 address is the PIC reg + an unspec constant.
4757 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4758 reg also appears in the address. */
4761 legitimize_pic_address (orig, reg)
4769 if (local_symbolic_operand (addr, Pmode))
4771 /* In 64bit mode we can address such objects directly. */
4776 /* This symbol may be referenced via a displacement from the PIC
4777 base address (@GOTOFF). */
4779 current_function_uses_pic_offset_table = 1;
4780 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4781 new = gen_rtx_CONST (Pmode, new);
4782 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4786 emit_move_insn (reg, new);
4791 else if (GET_CODE (addr) == SYMBOL_REF)
4795 current_function_uses_pic_offset_table = 1;
4796 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4797 new = gen_rtx_CONST (Pmode, new);
4798 new = gen_rtx_MEM (Pmode, new);
4799 RTX_UNCHANGING_P (new) = 1;
4800 set_mem_alias_set (new, ix86_GOT_alias_set ());
4803 reg = gen_reg_rtx (Pmode);
4804 /* Use directly gen_movsi, otherwise the address is loaded
4805 into register for CSE. We don't want to CSE this addresses,
4806 instead we CSE addresses from the GOT table, so skip this. */
4807 emit_insn (gen_movsi (reg, new));
4812 /* This symbol must be referenced via a load from the
4813 Global Offset Table (@GOT). */
4815 current_function_uses_pic_offset_table = 1;
4816 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4817 new = gen_rtx_CONST (Pmode, new);
4818 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4819 new = gen_rtx_MEM (Pmode, new);
4820 RTX_UNCHANGING_P (new) = 1;
4821 set_mem_alias_set (new, ix86_GOT_alias_set ());
4824 reg = gen_reg_rtx (Pmode);
4825 emit_move_insn (reg, new);
4831 if (GET_CODE (addr) == CONST)
4833 addr = XEXP (addr, 0);
4834 if (GET_CODE (addr) == UNSPEC)
4836 /* Check that the unspec is one of the ones we generate? */
4838 else if (GET_CODE (addr) != PLUS)
4841 if (GET_CODE (addr) == PLUS)
4843 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4845 /* Check first to see if this is a constant offset from a @GOTOFF
4846 symbol reference. */
4847 if (local_symbolic_operand (op0, Pmode)
4848 && GET_CODE (op1) == CONST_INT)
4852 current_function_uses_pic_offset_table = 1;
4853 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4854 new = gen_rtx_PLUS (Pmode, new, op1);
4855 new = gen_rtx_CONST (Pmode, new);
4856 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4860 emit_move_insn (reg, new);
4866 /* ??? We need to limit offsets here. */
4871 base = legitimize_pic_address (XEXP (addr, 0), reg);
4872 new = legitimize_pic_address (XEXP (addr, 1),
4873 base == reg ? NULL_RTX : reg);
4875 if (GET_CODE (new) == CONST_INT)
4876 new = plus_constant (base, INTVAL (new));
4879 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
4881 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
4882 new = XEXP (new, 1);
4884 new = gen_rtx_PLUS (Pmode, base, new);
4892 /* Try machine-dependent ways of modifying an illegitimate address
4893 to be legitimate. If we find one, return the new, valid address.
4894 This macro is used in only one place: `memory_address' in explow.c.
4896 OLDX is the address as it was before break_out_memory_refs was called.
4897 In some cases it is useful to look at this to decide what needs to be done.
4899 MODE and WIN are passed so that this macro can use
4900 GO_IF_LEGITIMATE_ADDRESS.
4902 It is always safe for this macro to do nothing. It exists to recognize
4903 opportunities to optimize the output.
4905 For the 80386, we handle X+REG by loading X into a register R and
4906 using R+REG. R will go in a general reg and indexing will be used.
4907 However, if REG is a broken-out memory address or multiplication,
4908 nothing needs to be done because REG can certainly go in a general reg.
4910 When -fpic is used, special handling is needed for symbolic references.
4911 See comments by legitimize_pic_address in i386.c for details. */
4914 legitimize_address (x, oldx, mode)
4916 register rtx oldx ATTRIBUTE_UNUSED;
4917 enum machine_mode mode;
4922 if (TARGET_DEBUG_ADDR)
4924 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
4925 GET_MODE_NAME (mode));
4929 if (flag_pic && SYMBOLIC_CONST (x))
4930 return legitimize_pic_address (x, 0);
4932 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
4933 if (GET_CODE (x) == ASHIFT
4934 && GET_CODE (XEXP (x, 1)) == CONST_INT
4935 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
4938 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
4939 GEN_INT (1 << log));
4942 if (GET_CODE (x) == PLUS)
4944 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
4946 if (GET_CODE (XEXP (x, 0)) == ASHIFT
4947 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4948 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
4951 XEXP (x, 0) = gen_rtx_MULT (Pmode,
4952 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
4953 GEN_INT (1 << log));
4956 if (GET_CODE (XEXP (x, 1)) == ASHIFT
4957 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4958 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
4961 XEXP (x, 1) = gen_rtx_MULT (Pmode,
4962 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
4963 GEN_INT (1 << log));
4966 /* Put multiply first if it isn't already. */
4967 if (GET_CODE (XEXP (x, 1)) == MULT)
4969 rtx tmp = XEXP (x, 0);
4970 XEXP (x, 0) = XEXP (x, 1);
4975 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
4976 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
4977 created by virtual register instantiation, register elimination, and
4978 similar optimizations. */
4979 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
4982 x = gen_rtx_PLUS (Pmode,
4983 gen_rtx_PLUS (Pmode, XEXP (x, 0),
4984 XEXP (XEXP (x, 1), 0)),
4985 XEXP (XEXP (x, 1), 1));
4989 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
4990 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
4991 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
4992 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4993 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
4994 && CONSTANT_P (XEXP (x, 1)))
4997 rtx other = NULL_RTX;
4999 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5001 constant = XEXP (x, 1);
5002 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5004 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5006 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5007 other = XEXP (x, 1);
5015 x = gen_rtx_PLUS (Pmode,
5016 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5017 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5018 plus_constant (other, INTVAL (constant)));
5022 if (changed && legitimate_address_p (mode, x, FALSE))
5025 if (GET_CODE (XEXP (x, 0)) == MULT)
5028 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5031 if (GET_CODE (XEXP (x, 1)) == MULT)
5034 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5038 && GET_CODE (XEXP (x, 1)) == REG
5039 && GET_CODE (XEXP (x, 0)) == REG)
5042 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5045 x = legitimize_pic_address (x, 0);
5048 if (changed && legitimate_address_p (mode, x, FALSE))
5051 if (GET_CODE (XEXP (x, 0)) == REG)
5053 register rtx temp = gen_reg_rtx (Pmode);
5054 register rtx val = force_operand (XEXP (x, 1), temp);
5056 emit_move_insn (temp, val);
5062 else if (GET_CODE (XEXP (x, 1)) == REG)
5064 register rtx temp = gen_reg_rtx (Pmode);
5065 register rtx val = force_operand (XEXP (x, 0), temp);
5067 emit_move_insn (temp, val);
5077 /* Print an integer constant expression in assembler syntax. Addition
5078 and subtraction are the only arithmetic that may appear in these
5079 expressions. FILE is the stdio stream to write to, X is the rtx, and
5080 CODE is the operand print code from the output string. */
5083 output_pic_addr_const (file, x, code)
5090 switch (GET_CODE (x))
5100 assemble_name (file, XSTR (x, 0));
5101 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5102 fputs ("@PLT", file);
5109 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5110 assemble_name (asm_out_file, buf);
5114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5118 /* This used to output parentheses around the expression,
5119 but that does not work on the 386 (either ATT or BSD assembler). */
5120 output_pic_addr_const (file, XEXP (x, 0), code);
5124 if (GET_MODE (x) == VOIDmode)
5126 /* We can use %d if the number is <32 bits and positive. */
5127 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5128 fprintf (file, "0x%lx%08lx",
5129 (unsigned long) CONST_DOUBLE_HIGH (x),
5130 (unsigned long) CONST_DOUBLE_LOW (x));
5132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5135 /* We can't handle floating point constants;
5136 PRINT_OPERAND must handle them. */
5137 output_operand_lossage ("floating constant misused");
5141 /* Some assemblers need integer constants to appear first. */
5142 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5144 output_pic_addr_const (file, XEXP (x, 0), code);
5146 output_pic_addr_const (file, XEXP (x, 1), code);
5148 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5150 output_pic_addr_const (file, XEXP (x, 1), code);
5152 output_pic_addr_const (file, XEXP (x, 0), code);
5159 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
5160 output_pic_addr_const (file, XEXP (x, 0), code);
5162 output_pic_addr_const (file, XEXP (x, 1), code);
5163 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
5167 if (XVECLEN (x, 0) != 1)
5169 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5170 switch (XINT (x, 1))
5173 fputs ("@GOT", file);
5176 fputs ("@GOTOFF", file);
5179 fputs ("@PLT", file);
5182 fputs ("@GOTPCREL(%RIP)", file);
5185 output_operand_lossage ("invalid UNSPEC as operand");
5191 output_operand_lossage ("invalid expression as operand");
5195 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5196 We need to handle our special PIC relocations. */
5199 i386_dwarf_output_addr_const (file, x)
5204 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5208 fprintf (file, "%s", ASM_LONG);
5211 output_pic_addr_const (file, x, '\0');
5213 output_addr_const (file, x);
5217 /* In the name of slightly smaller debug output, and to cater to
5218 general assembler losage, recognize PIC+GOTOFF and turn it back
5219 into a direct symbol reference. */
5222 i386_simplify_dwarf_addr (orig_x)
5229 if (GET_CODE (x) != CONST
5230 || GET_CODE (XEXP (x, 0)) != UNSPEC
5231 || XINT (XEXP (x, 0), 1) != 15)
5233 return XVECEXP (XEXP (x, 0), 0, 0);
5236 if (GET_CODE (x) != PLUS
5237 || GET_CODE (XEXP (x, 0)) != REG
5238 || GET_CODE (XEXP (x, 1)) != CONST)
5241 x = XEXP (XEXP (x, 1), 0);
5242 if (GET_CODE (x) == UNSPEC
5243 && (XINT (x, 1) == 6
5244 || XINT (x, 1) == 7))
5245 return XVECEXP (x, 0, 0);
5247 if (GET_CODE (x) == PLUS
5248 && GET_CODE (XEXP (x, 0)) == UNSPEC
5249 && GET_CODE (XEXP (x, 1)) == CONST_INT
5250 && (XINT (XEXP (x, 0), 1) == 6
5251 || XINT (XEXP (x, 0), 1) == 7))
5252 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5258 put_condition_code (code, mode, reverse, fp, file)
5260 enum machine_mode mode;
5266 if (mode == CCFPmode || mode == CCFPUmode)
5268 enum rtx_code second_code, bypass_code;
5269 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5270 if (bypass_code != NIL || second_code != NIL)
5272 code = ix86_fp_compare_code_to_integer (code);
5276 code = reverse_condition (code);
5287 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5292 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5293 Those same assemblers have the same but opposite losage on cmov. */
5296 suffix = fp ? "nbe" : "a";
5299 if (mode == CCNOmode || mode == CCGOCmode)
5301 else if (mode == CCmode || mode == CCGCmode)
5312 if (mode == CCNOmode || mode == CCGOCmode)
5314 else if (mode == CCmode || mode == CCGCmode)
5323 suffix = fp ? "nb" : "ae";
5326 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5336 suffix = fp ? "u" : "p";
5339 suffix = fp ? "nu" : "np";
5344 fputs (suffix, file);
5348 print_reg (x, code, file)
5353 if (REGNO (x) == ARG_POINTER_REGNUM
5354 || REGNO (x) == FRAME_POINTER_REGNUM
5355 || REGNO (x) == FLAGS_REG
5356 || REGNO (x) == FPSR_REG)
5359 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
5362 if (code == 'w' || MMX_REG_P (x))
5364 else if (code == 'b')
5366 else if (code == 'k')
5368 else if (code == 'q')
5370 else if (code == 'y')
5372 else if (code == 'h')
5375 code = GET_MODE_SIZE (GET_MODE (x));
5377 /* Irritatingly, AMD extended registers use different naming convention
5378 from the normal registers. */
5379 if (REX_INT_REG_P (x))
5386 error ("extended registers have no high halves");
5389 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5392 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5395 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5398 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5401 error ("unsupported operand size for extended register");
5409 if (STACK_TOP_P (x))
5411 fputs ("st(0)", file);
5418 if (! ANY_FP_REG_P (x))
5419 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5423 fputs (hi_reg_name[REGNO (x)], file);
5426 fputs (qi_reg_name[REGNO (x)], file);
5429 fputs (qi_high_reg_name[REGNO (x)], file);
5437 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5438 C -- print opcode suffix for set/cmov insn.
5439 c -- like C, but print reversed condition
5440 F,f -- likewise, but for floating-point.
5441 R -- print the prefix for register names.
5442 z -- print the opcode suffix for the size of the current operand.
5443 * -- print a star (in certain assembler syntax)
5444 A -- print an absolute memory reference.
5445 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5446 s -- print a shift double count, followed by the assemblers argument
5448 b -- print the QImode name of the register for the indicated operand.
5449 %b0 would print %al if operands[0] is reg 0.
5450 w -- likewise, print the HImode name of the register.
5451 k -- likewise, print the SImode name of the register.
5452 q -- likewise, print the DImode name of the register.
5453 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5454 y -- print "st(0)" instead of "st" as a register.
5455 D -- print condition for SSE cmp instruction.
5456 P -- if PIC, print an @PLT suffix.
5457 X -- don't print any sort of PIC '@' suffix for a symbol.
5461 print_operand (file, x, code)
5471 if (ASSEMBLER_DIALECT == 0)
5476 if (ASSEMBLER_DIALECT == 0)
5478 else if (ASSEMBLER_DIALECT == 1)
5480 /* Intel syntax. For absolute addresses, registers should not
5481 be surrounded by braces. */
5482 if (GET_CODE (x) != REG)
5485 PRINT_OPERAND (file, x, 0);
5491 PRINT_OPERAND (file, x, 0);
5496 if (ASSEMBLER_DIALECT == 0)
5501 if (ASSEMBLER_DIALECT == 0)
5506 if (ASSEMBLER_DIALECT == 0)
5511 if (ASSEMBLER_DIALECT == 0)
5516 if (ASSEMBLER_DIALECT == 0)
5521 if (ASSEMBLER_DIALECT == 0)
5526 /* 387 opcodes don't get size suffixes if the operands are
5529 if (STACK_REG_P (x))
5532 /* this is the size of op from size of operand */
5533 switch (GET_MODE_SIZE (GET_MODE (x)))
5536 #ifdef HAVE_GAS_FILDS_FISTS
5542 if (GET_MODE (x) == SFmode)
5557 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5559 #ifdef GAS_MNEMONICS
5585 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5587 PRINT_OPERAND (file, x, 0);
5593 /* Little bit of braindamage here. The SSE compare instructions
5594 does use completely different names for the comparisons that the
5595 fp conditional moves. */
5596 switch (GET_CODE (x))
5611 fputs ("unord", file);
5615 fputs ("neq", file);
5619 fputs ("nlt", file);
5623 fputs ("nle", file);
5626 fputs ("ord", file);
5634 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5637 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5640 /* Like above, but reverse condition */
5642 /* Check to see if argument to %c is really a constant
5643 and not a condition code which needs to be reversed. */
5644 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5646 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5649 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5652 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5658 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5661 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5664 int pred_val = INTVAL (XEXP (x, 0));
5666 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5667 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5669 int taken = pred_val > REG_BR_PROB_BASE / 2;
5670 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5672 /* Emit hints only in the case default branch prediction
5673 heruistics would fail. */
5674 if (taken != cputaken)
5676 /* We use 3e (DS) prefix for taken branches and
5677 2e (CS) prefix for not taken branches. */
5679 fputs ("ds ; ", file);
5681 fputs ("cs ; ", file);
5690 sprintf (str, "invalid operand code `%c'", code);
5691 output_operand_lossage (str);
5696 if (GET_CODE (x) == REG)
5698 PRINT_REG (x, code, file);
5701 else if (GET_CODE (x) == MEM)
5703 /* No `byte ptr' prefix for call instructions. */
5704 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
5707 switch (GET_MODE_SIZE (GET_MODE (x)))
5709 case 1: size = "BYTE"; break;
5710 case 2: size = "WORD"; break;
5711 case 4: size = "DWORD"; break;
5712 case 8: size = "QWORD"; break;
5713 case 12: size = "XWORD"; break;
5714 case 16: size = "XMMWORD"; break;
5719 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5722 else if (code == 'w')
5724 else if (code == 'k')
5728 fputs (" PTR ", file);
5732 if (flag_pic && CONSTANT_ADDRESS_P (x))
5733 output_pic_addr_const (file, x, code);
5734 /* Avoid (%rip) for call operands. */
5735 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5736 && GET_CODE (x) != CONST_INT)
5737 output_addr_const (file, x);
5742 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5747 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5748 REAL_VALUE_TO_TARGET_SINGLE (r, l);
5750 if (ASSEMBLER_DIALECT == 0)
5752 fprintf (file, "0x%lx", l);
5755 /* These float cases don't actually occur as immediate operands. */
5756 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5761 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5762 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5763 fprintf (file, "%s", dstr);
5766 else if (GET_CODE (x) == CONST_DOUBLE
5767 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5772 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5773 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5774 fprintf (file, "%s", dstr);
5780 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5782 if (ASSEMBLER_DIALECT == 0)
5785 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5786 || GET_CODE (x) == LABEL_REF)
5788 if (ASSEMBLER_DIALECT == 0)
5791 fputs ("OFFSET FLAT:", file);
5794 if (GET_CODE (x) == CONST_INT)
5795 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5797 output_pic_addr_const (file, x, code);
5799 output_addr_const (file, x);
5803 /* Print a memory operand whose address is ADDR. */
5806 print_operand_address (file, addr)
5810 struct ix86_address parts;
5811 rtx base, index, disp;
5814 if (! ix86_decompose_address (addr, &parts))
5818 index = parts.index;
5820 scale = parts.scale;
5822 if (!base && !index)
5824 /* Displacement only requires special attention. */
5826 if (GET_CODE (disp) == CONST_INT)
5828 if (ASSEMBLER_DIALECT != 0)
5830 if (USER_LABEL_PREFIX[0] == 0)
5832 fputs ("ds:", file);
5834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5837 output_pic_addr_const (file, addr, 0);
5839 output_addr_const (file, addr);
5841 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5842 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5843 fputs ("(%rip)", file);
5847 if (ASSEMBLER_DIALECT == 0)
5852 output_pic_addr_const (file, disp, 0);
5853 else if (GET_CODE (disp) == LABEL_REF)
5854 output_asm_label (disp);
5856 output_addr_const (file, disp);
5861 PRINT_REG (base, 0, file);
5865 PRINT_REG (index, 0, file);
5867 fprintf (file, ",%d", scale);
5873 rtx offset = NULL_RTX;
5877 /* Pull out the offset of a symbol; print any symbol itself. */
5878 if (GET_CODE (disp) == CONST
5879 && GET_CODE (XEXP (disp, 0)) == PLUS
5880 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
5882 offset = XEXP (XEXP (disp, 0), 1);
5883 disp = gen_rtx_CONST (VOIDmode,
5884 XEXP (XEXP (disp, 0), 0));
5888 output_pic_addr_const (file, disp, 0);
5889 else if (GET_CODE (disp) == LABEL_REF)
5890 output_asm_label (disp);
5891 else if (GET_CODE (disp) == CONST_INT)
5894 output_addr_const (file, disp);
5900 PRINT_REG (base, 0, file);
5903 if (INTVAL (offset) >= 0)
5905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5916 PRINT_REG (index, 0, file);
5918 fprintf (file, "*%d", scale);
5925 /* Split one or more DImode RTL references into pairs of SImode
5926 references. The RTL can be REG, offsettable MEM, integer constant, or
5927 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5928 split and "num" is its length. lo_half and hi_half are output arrays
5929 that parallel "operands". */
5932 split_di (operands, num, lo_half, hi_half)
5935 rtx lo_half[], hi_half[];
5939 rtx op = operands[num];
5941 /* simplify_subreg refuse to split volatile memory addresses,
5942 but we still have to handle it. */
5943 if (GET_CODE (op) == MEM)
5945 lo_half[num] = adjust_address (op, SImode, 0);
5946 hi_half[num] = adjust_address (op, SImode, 4);
5950 lo_half[num] = simplify_gen_subreg (SImode, op,
5951 GET_MODE (op) == VOIDmode
5952 ? DImode : GET_MODE (op), 0);
5953 hi_half[num] = simplify_gen_subreg (SImode, op,
5954 GET_MODE (op) == VOIDmode
5955 ? DImode : GET_MODE (op), 4);
5959 /* Split one or more TImode RTL references into pairs of SImode
5960 references. The RTL can be REG, offsettable MEM, integer constant, or
5961 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
5962 split and "num" is its length. lo_half and hi_half are output arrays
5963 that parallel "operands". */
5966 split_ti (operands, num, lo_half, hi_half)
5969 rtx lo_half[], hi_half[];
5973 rtx op = operands[num];
5975 /* simplify_subreg refuse to split volatile memory addresses, but we
5976 still have to handle it. */
5977 if (GET_CODE (op) == MEM)
5979 lo_half[num] = adjust_address (op, DImode, 0);
5980 hi_half[num] = adjust_address (op, DImode, 8);
5984 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
5985 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
5990 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
5991 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
5992 is the expression of the binary operation. The output may either be
5993 emitted here, or returned to the caller, like all output_* functions.
5995 There is no guarantee that the operands are the same mode, as they
5996 might be within FLOAT or FLOAT_EXTEND expressions. */
5998 #ifndef SYSV386_COMPAT
5999 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6000 wants to fix the assemblers because that causes incompatibility
6001 with gcc. No-one wants to fix gcc because that causes
6002 incompatibility with assemblers... You can use the option of
6003 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6004 #define SYSV386_COMPAT 1
6008 output_387_binary_op (insn, operands)
6012 static char buf[30];
6015 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6017 #ifdef ENABLE_CHECKING
6018 /* Even if we do not want to check the inputs, this documents input
6019 constraints. Which helps in understanding the following code. */
6020 if (STACK_REG_P (operands[0])
6021 && ((REG_P (operands[1])
6022 && REGNO (operands[0]) == REGNO (operands[1])
6023 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6024 || (REG_P (operands[2])
6025 && REGNO (operands[0]) == REGNO (operands[2])
6026 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6027 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6033 switch (GET_CODE (operands[3]))
6036 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6037 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6045 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6046 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6054 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6055 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6063 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6064 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6078 if (GET_MODE (operands[0]) == SFmode)
6079 strcat (buf, "ss\t{%2, %0|%0, %2}");
6081 strcat (buf, "sd\t{%2, %0|%0, %2}");
6086 switch (GET_CODE (operands[3]))
6090 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6092 rtx temp = operands[2];
6093 operands[2] = operands[1];
6097 /* know operands[0] == operands[1]. */
6099 if (GET_CODE (operands[2]) == MEM)
6105 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6107 if (STACK_TOP_P (operands[0]))
6108 /* How is it that we are storing to a dead operand[2]?
6109 Well, presumably operands[1] is dead too. We can't
6110 store the result to st(0) as st(0) gets popped on this
6111 instruction. Instead store to operands[2] (which I
6112 think has to be st(1)). st(1) will be popped later.
6113 gcc <= 2.8.1 didn't have this check and generated
6114 assembly code that the Unixware assembler rejected. */
6115 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6117 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6121 if (STACK_TOP_P (operands[0]))
6122 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6124 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6129 if (GET_CODE (operands[1]) == MEM)
6135 if (GET_CODE (operands[2]) == MEM)
6141 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6144 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6145 derived assemblers, confusingly reverse the direction of
6146 the operation for fsub{r} and fdiv{r} when the
6147 destination register is not st(0). The Intel assembler
6148 doesn't have this brain damage. Read !SYSV386_COMPAT to
6149 figure out what the hardware really does. */
6150 if (STACK_TOP_P (operands[0]))
6151 p = "{p\t%0, %2|rp\t%2, %0}";
6153 p = "{rp\t%2, %0|p\t%0, %2}";
6155 if (STACK_TOP_P (operands[0]))
6156 /* As above for fmul/fadd, we can't store to st(0). */
6157 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6159 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6164 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6167 if (STACK_TOP_P (operands[0]))
6168 p = "{rp\t%0, %1|p\t%1, %0}";
6170 p = "{p\t%1, %0|rp\t%0, %1}";
6172 if (STACK_TOP_P (operands[0]))
6173 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6175 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6180 if (STACK_TOP_P (operands[0]))
6182 if (STACK_TOP_P (operands[1]))
6183 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6185 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6188 else if (STACK_TOP_P (operands[1]))
6191 p = "{\t%1, %0|r\t%0, %1}";
6193 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6199 p = "{r\t%2, %0|\t%0, %2}";
6201 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6214 /* Output code to initialize control word copies used by
6215 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6216 is set to control word rounding downwards. */
6218 emit_i387_cw_initialization (normal, round_down)
6219 rtx normal, round_down;
6221 rtx reg = gen_reg_rtx (HImode);
6223 emit_insn (gen_x86_fnstcw_1 (normal));
6224 emit_move_insn (reg, normal);
6225 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6227 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6229 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6230 emit_move_insn (round_down, reg);
6233 /* Output code for INSN to convert a float to a signed int. OPERANDS
6234 are the insn operands. The output may be [HSD]Imode and the input
6235 operand may be [SDX]Fmode. */
6238 output_fix_trunc (insn, operands)
6242 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6243 int dimode_p = GET_MODE (operands[0]) == DImode;
6245 /* Jump through a hoop or two for DImode, since the hardware has no
6246 non-popping instruction. We used to do this a different way, but
6247 that was somewhat fragile and broke with post-reload splitters. */
6248 if (dimode_p && !stack_top_dies)
6249 output_asm_insn ("fld\t%y1", operands);
6251 if (!STACK_TOP_P (operands[1]))
6254 if (GET_CODE (operands[0]) != MEM)
6257 output_asm_insn ("fldcw\t%3", operands);
6258 if (stack_top_dies || dimode_p)
6259 output_asm_insn ("fistp%z0\t%0", operands);
6261 output_asm_insn ("fist%z0\t%0", operands);
6262 output_asm_insn ("fldcw\t%2", operands);
6267 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6268 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6269 when fucom should be used. */
6272 output_fp_compare (insn, operands, eflags_p, unordered_p)
6275 int eflags_p, unordered_p;
6278 rtx cmp_op0 = operands[0];
6279 rtx cmp_op1 = operands[1];
6280 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6285 cmp_op1 = operands[2];
6289 if (GET_MODE (operands[0]) == SFmode)
6291 return "ucomiss\t{%1, %0|%0, %1}";
6293 return "comiss\t{%1, %0|%0, %y}";
6296 return "ucomisd\t{%1, %0|%0, %1}";
6298 return "comisd\t{%1, %0|%0, %y}";
6301 if (! STACK_TOP_P (cmp_op0))
6304 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6306 if (STACK_REG_P (cmp_op1)
6308 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6309 && REGNO (cmp_op1) != FIRST_STACK_REG)
6311 /* If both the top of the 387 stack dies, and the other operand
6312 is also a stack register that dies, then this must be a
6313 `fcompp' float compare */
6317 /* There is no double popping fcomi variant. Fortunately,
6318 eflags is immune from the fstp's cc clobbering. */
6320 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6322 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6330 return "fucompp\n\tfnstsw\t%0";
6332 return "fcompp\n\tfnstsw\t%0";
6345 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6347 static const char * const alt[24] =
6359 "fcomi\t{%y1, %0|%0, %y1}",
6360 "fcomip\t{%y1, %0|%0, %y1}",
6361 "fucomi\t{%y1, %0|%0, %y1}",
6362 "fucomip\t{%y1, %0|%0, %y1}",
6369 "fcom%z2\t%y2\n\tfnstsw\t%0",
6370 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6371 "fucom%z2\t%y2\n\tfnstsw\t%0",
6372 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6374 "ficom%z2\t%y2\n\tfnstsw\t%0",
6375 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6383 mask = eflags_p << 3;
6384 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6385 mask |= unordered_p << 1;
6386 mask |= stack_top_dies;
6399 ix86_output_addr_vec_elt (file, value)
6403 const char *directive = ASM_LONG;
6408 directive = ASM_QUAD;
6414 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6418 ix86_output_addr_diff_elt (file, value, rel)
6423 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6424 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6425 else if (HAVE_AS_GOTOFF_IN_DATA)
6426 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6428 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6429 ASM_LONG, LPREFIX, value);
6432 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6436 ix86_expand_clear (dest)
6441 /* We play register width games, which are only valid after reload. */
6442 if (!reload_completed)
6445 /* Avoid HImode and its attendant prefix byte. */
6446 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6447 dest = gen_rtx_REG (SImode, REGNO (dest));
6449 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6451 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6452 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6454 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6455 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6462 ix86_expand_move (mode, operands)
6463 enum machine_mode mode;
6466 int strict = (reload_in_progress || reload_completed);
6469 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6471 /* Emit insns to move operands[1] into operands[0]. */
6473 if (GET_CODE (operands[0]) == MEM)
6474 operands[1] = force_reg (Pmode, operands[1]);
6477 rtx temp = operands[0];
6478 if (GET_CODE (temp) != REG)
6479 temp = gen_reg_rtx (Pmode);
6480 temp = legitimize_pic_address (operands[1], temp);
6481 if (temp == operands[0])
6488 if (GET_CODE (operands[0]) == MEM
6489 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6490 || !push_operand (operands[0], mode))
6491 && GET_CODE (operands[1]) == MEM)
6492 operands[1] = force_reg (mode, operands[1]);
6494 if (push_operand (operands[0], mode)
6495 && ! general_no_elim_operand (operands[1], mode))
6496 operands[1] = copy_to_mode_reg (mode, operands[1]);
6498 /* Force large constants in 64bit compilation into register
6499 to get them CSEed. */
6500 if (TARGET_64BIT && mode == DImode
6501 && immediate_operand (operands[1], mode)
6502 && !x86_64_zero_extended_value (operands[1])
6503 && !register_operand (operands[0], mode)
6504 && optimize && !reload_completed && !reload_in_progress)
6505 operands[1] = copy_to_mode_reg (mode, operands[1]);
6507 if (FLOAT_MODE_P (mode))
6509 /* If we are loading a floating point constant to a register,
6510 force the value to memory now, since we'll get better code
6511 out the back end. */
6515 else if (GET_CODE (operands[1]) == CONST_DOUBLE
6516 && register_operand (operands[0], mode))
6517 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6521 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6526 /* Attempt to expand a binary operator. Make the expansion closer to the
6527 actual machine, then just general_operand, which will allow 3 separate
6528 memory references (one output, two input) in a single insn. */
6531 ix86_expand_binary_operator (code, mode, operands)
6533 enum machine_mode mode;
6536 int matching_memory;
6537 rtx src1, src2, dst, op, clob;
6543 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6544 if (GET_RTX_CLASS (code) == 'c'
6545 && (rtx_equal_p (dst, src2)
6546 || immediate_operand (src1, mode)))
6553 /* If the destination is memory, and we do not have matching source
6554 operands, do things in registers. */
6555 matching_memory = 0;
6556 if (GET_CODE (dst) == MEM)
6558 if (rtx_equal_p (dst, src1))
6559 matching_memory = 1;
6560 else if (GET_RTX_CLASS (code) == 'c'
6561 && rtx_equal_p (dst, src2))
6562 matching_memory = 2;
6564 dst = gen_reg_rtx (mode);
6567 /* Both source operands cannot be in memory. */
6568 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6570 if (matching_memory != 2)
6571 src2 = force_reg (mode, src2);
6573 src1 = force_reg (mode, src1);
6576 /* If the operation is not commutable, source 1 cannot be a constant
6577 or non-matching memory. */
6578 if ((CONSTANT_P (src1)
6579 || (!matching_memory && GET_CODE (src1) == MEM))
6580 && GET_RTX_CLASS (code) != 'c')
6581 src1 = force_reg (mode, src1);
6583 /* If optimizing, copy to regs to improve CSE */
6584 if (optimize && ! no_new_pseudos)
6586 if (GET_CODE (dst) == MEM)
6587 dst = gen_reg_rtx (mode);
6588 if (GET_CODE (src1) == MEM)
6589 src1 = force_reg (mode, src1);
6590 if (GET_CODE (src2) == MEM)
6591 src2 = force_reg (mode, src2);
6594 /* Emit the instruction. */
6596 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6597 if (reload_in_progress)
6599 /* Reload doesn't know about the flags register, and doesn't know that
6600 it doesn't want to clobber it. We can only do this with PLUS. */
6607 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6608 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6611 /* Fix up the destination if needed. */
6612 if (dst != operands[0])
6613 emit_move_insn (operands[0], dst);
6616 /* Return TRUE or FALSE depending on whether the binary operator meets the
6617 appropriate constraints. */
6620 ix86_binary_operator_ok (code, mode, operands)
6622 enum machine_mode mode ATTRIBUTE_UNUSED;
6625 /* Both source operands cannot be in memory. */
6626 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6628 /* If the operation is not commutable, source 1 cannot be a constant. */
6629 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6631 /* If the destination is memory, we must have a matching source operand. */
6632 if (GET_CODE (operands[0]) == MEM
6633 && ! (rtx_equal_p (operands[0], operands[1])
6634 || (GET_RTX_CLASS (code) == 'c'
6635 && rtx_equal_p (operands[0], operands[2]))))
6637 /* If the operation is not commutable and the source 1 is memory, we must
6638 have a matching destionation. */
6639 if (GET_CODE (operands[1]) == MEM
6640 && GET_RTX_CLASS (code) != 'c'
6641 && ! rtx_equal_p (operands[0], operands[1]))
6646 /* Attempt to expand a unary operator. Make the expansion closer to the
6647 actual machine, then just general_operand, which will allow 2 separate
6648 memory references (one output, one input) in a single insn. */
6651 ix86_expand_unary_operator (code, mode, operands)
6653 enum machine_mode mode;
6656 int matching_memory;
6657 rtx src, dst, op, clob;
6662 /* If the destination is memory, and we do not have matching source
6663 operands, do things in registers. */
6664 matching_memory = 0;
6665 if (GET_CODE (dst) == MEM)
6667 if (rtx_equal_p (dst, src))
6668 matching_memory = 1;
6670 dst = gen_reg_rtx (mode);
6673 /* When source operand is memory, destination must match. */
6674 if (!matching_memory && GET_CODE (src) == MEM)
6675 src = force_reg (mode, src);
6677 /* If optimizing, copy to regs to improve CSE */
6678 if (optimize && ! no_new_pseudos)
6680 if (GET_CODE (dst) == MEM)
6681 dst = gen_reg_rtx (mode);
6682 if (GET_CODE (src) == MEM)
6683 src = force_reg (mode, src);
6686 /* Emit the instruction. */
6688 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6689 if (reload_in_progress || code == NOT)
6691 /* Reload doesn't know about the flags register, and doesn't know that
6692 it doesn't want to clobber it. */
6699 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6700 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6703 /* Fix up the destination if needed. */
6704 if (dst != operands[0])
6705 emit_move_insn (operands[0], dst);
6708 /* Return TRUE or FALSE depending on whether the unary operator meets the
6709 appropriate constraints. */
6712 ix86_unary_operator_ok (code, mode, operands)
6713 enum rtx_code code ATTRIBUTE_UNUSED;
6714 enum machine_mode mode ATTRIBUTE_UNUSED;
6715 rtx operands[2] ATTRIBUTE_UNUSED;
6717 /* If one of operands is memory, source and destination must match. */
6718 if ((GET_CODE (operands[0]) == MEM
6719 || GET_CODE (operands[1]) == MEM)
6720 && ! rtx_equal_p (operands[0], operands[1]))
6725 /* Return TRUE or FALSE depending on whether the first SET in INSN
6726 has source and destination with matching CC modes, and that the
6727 CC mode is at least as constrained as REQ_MODE. */
6730 ix86_match_ccmode (insn, req_mode)
6732 enum machine_mode req_mode;
6735 enum machine_mode set_mode;
6737 set = PATTERN (insn);
6738 if (GET_CODE (set) == PARALLEL)
6739 set = XVECEXP (set, 0, 0);
6740 if (GET_CODE (set) != SET)
6742 if (GET_CODE (SET_SRC (set)) != COMPARE)
6745 set_mode = GET_MODE (SET_DEST (set));
6749 if (req_mode != CCNOmode
6750 && (req_mode != CCmode
6751 || XEXP (SET_SRC (set), 1) != const0_rtx))
6755 if (req_mode == CCGCmode)
6759 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6763 if (req_mode == CCZmode)
6773 return (GET_MODE (SET_SRC (set)) == set_mode);
6776 /* Generate insn patterns to do an integer compare of OPERANDS. */
6779 ix86_expand_int_compare (code, op0, op1)
6783 enum machine_mode cmpmode;
6786 cmpmode = SELECT_CC_MODE (code, op0, op1);
6787 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6789 /* This is very simple, but making the interface the same as in the
6790 FP case makes the rest of the code easier. */
6791 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6792 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6794 /* Return the test that should be put into the flags user, i.e.
6795 the bcc, scc, or cmov instruction. */
6796 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6799 /* Figure out whether to use ordered or unordered fp comparisons.
6800 Return the appropriate mode to use. */
6803 ix86_fp_compare_mode (code)
6804 enum rtx_code code ATTRIBUTE_UNUSED;
6806 /* ??? In order to make all comparisons reversible, we do all comparisons
6807 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6808 all forms trapping and nontrapping comparisons, we can make inequality
6809 comparisons trapping again, since it results in better code when using
6810 FCOM based compares. */
6811 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
6815 ix86_cc_mode (code, op0, op1)
6819 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6820 return ix86_fp_compare_mode (code);
6823 /* Only zero flag is needed. */
6825 case NE: /* ZF!=0 */
6827 /* Codes needing carry flag. */
6828 case GEU: /* CF=0 */
6829 case GTU: /* CF=0 & ZF=0 */
6830 case LTU: /* CF=1 */
6831 case LEU: /* CF=1 | ZF=1 */
6833 /* Codes possibly doable only with sign flag when
6834 comparing against zero. */
6835 case GE: /* SF=OF or SF=0 */
6836 case LT: /* SF<>OF or SF=1 */
6837 if (op1 == const0_rtx)
6840 /* For other cases Carry flag is not required. */
6842 /* Codes doable only with sign flag when comparing
6843 against zero, but we miss jump instruction for it
6844 so we need to use relational tests agains overflow
6845 that thus needs to be zero. */
6846 case GT: /* ZF=0 & SF=OF */
6847 case LE: /* ZF=1 | SF<>OF */
6848 if (op1 == const0_rtx)
6852 /* strcmp pattern do (use flags) and combine may ask us for proper
6861 /* Return true if we should use an FCOMI instruction for this fp comparison. */
6864 ix86_use_fcomi_compare (code)
6865 enum rtx_code code ATTRIBUTE_UNUSED;
6867 enum rtx_code swapped_code = swap_condition (code);
6868 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
6869 || (ix86_fp_comparison_cost (swapped_code)
6870 == ix86_fp_comparison_fcomi_cost (swapped_code)));
6873 /* Swap, force into registers, or otherwise massage the two operands
6874 to a fp comparison. The operands are updated in place; the new
6875 comparsion code is returned. */
6877 static enum rtx_code
6878 ix86_prepare_fp_compare_args (code, pop0, pop1)
6882 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
6883 rtx op0 = *pop0, op1 = *pop1;
6884 enum machine_mode op_mode = GET_MODE (op0);
6885 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
6887 /* All of the unordered compare instructions only work on registers.
6888 The same is true of the XFmode compare instructions. The same is
6889 true of the fcomi compare instructions. */
6892 && (fpcmp_mode == CCFPUmode
6893 || op_mode == XFmode
6894 || op_mode == TFmode
6895 || ix86_use_fcomi_compare (code)))
6897 op0 = force_reg (op_mode, op0);
6898 op1 = force_reg (op_mode, op1);
6902 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
6903 things around if they appear profitable, otherwise force op0
6906 if (standard_80387_constant_p (op0) == 0
6907 || (GET_CODE (op0) == MEM
6908 && ! (standard_80387_constant_p (op1) == 0
6909 || GET_CODE (op1) == MEM)))
6912 tmp = op0, op0 = op1, op1 = tmp;
6913 code = swap_condition (code);
6916 if (GET_CODE (op0) != REG)
6917 op0 = force_reg (op_mode, op0);
6919 if (CONSTANT_P (op1))
6921 if (standard_80387_constant_p (op1))
6922 op1 = force_reg (op_mode, op1);
6924 op1 = validize_mem (force_const_mem (op_mode, op1));
6928 /* Try to rearrange the comparison to make it cheaper. */
6929 if (ix86_fp_comparison_cost (code)
6930 > ix86_fp_comparison_cost (swap_condition (code))
6931 && (GET_CODE (op0) == REG || !reload_completed))
6934 tmp = op0, op0 = op1, op1 = tmp;
6935 code = swap_condition (code);
6936 if (GET_CODE (op0) != REG)
6937 op0 = force_reg (op_mode, op0);
6945 /* Convert comparison codes we use to represent FP comparison to integer
6946 code that will result in proper branch. Return UNKNOWN if no such code
6948 static enum rtx_code
6949 ix86_fp_compare_code_to_integer (code)
6979 /* Split comparison code CODE into comparisons we can do using branch
6980 instructions. BYPASS_CODE is comparison code for branch that will
6981 branch around FIRST_CODE and SECOND_CODE. If some of branches
6982 is not required, set value to NIL.
6983 We never require more than two branches. */
6985 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
6986 enum rtx_code code, *bypass_code, *first_code, *second_code;
6992 /* The fcomi comparison sets flags as follows:
7002 case GT: /* GTU - CF=0 & ZF=0 */
7003 case GE: /* GEU - CF=0 */
7004 case ORDERED: /* PF=0 */
7005 case UNORDERED: /* PF=1 */
7006 case UNEQ: /* EQ - ZF=1 */
7007 case UNLT: /* LTU - CF=1 */
7008 case UNLE: /* LEU - CF=1 | ZF=1 */
7009 case LTGT: /* EQ - ZF=0 */
7011 case LT: /* LTU - CF=1 - fails on unordered */
7013 *bypass_code = UNORDERED;
7015 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7017 *bypass_code = UNORDERED;
7019 case EQ: /* EQ - ZF=1 - fails on unordered */
7021 *bypass_code = UNORDERED;
7023 case NE: /* NE - ZF=0 - fails on unordered */
7025 *second_code = UNORDERED;
7027 case UNGE: /* GEU - CF=0 - fails on unordered */
7029 *second_code = UNORDERED;
7031 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7033 *second_code = UNORDERED;
7038 if (!TARGET_IEEE_FP)
7045 /* Return cost of comparison done fcom + arithmetics operations on AX.
7046 All following functions do use number of instructions as an cost metrics.
7047 In future this should be tweaked to compute bytes for optimize_size and
7048 take into account performance of various instructions on various CPUs. */
7050 ix86_fp_comparison_arithmetics_cost (code)
7053 if (!TARGET_IEEE_FP)
7055 /* The cost of code output by ix86_expand_fp_compare. */
7083 /* Return cost of comparison done using fcomi operation.
7084 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7086 ix86_fp_comparison_fcomi_cost (code)
7089 enum rtx_code bypass_code, first_code, second_code;
7090 /* Return arbitarily high cost when instruction is not supported - this
7091 prevents gcc from using it. */
7094 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7095 return (bypass_code != NIL || second_code != NIL) + 2;
7098 /* Return cost of comparison done using sahf operation.
7099 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7101 ix86_fp_comparison_sahf_cost (code)
7104 enum rtx_code bypass_code, first_code, second_code;
7105 /* Return arbitarily high cost when instruction is not preferred - this
7106 avoids gcc from using it. */
7107 if (!TARGET_USE_SAHF && !optimize_size)
7109 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7110 return (bypass_code != NIL || second_code != NIL) + 3;
7113 /* Compute cost of the comparison done using any method.
7114 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7116 ix86_fp_comparison_cost (code)
7119 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7122 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7123 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7125 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7126 if (min > sahf_cost)
7128 if (min > fcomi_cost)
7133 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7136 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7138 rtx op0, op1, scratch;
7142 enum machine_mode fpcmp_mode, intcmp_mode;
7144 int cost = ix86_fp_comparison_cost (code);
7145 enum rtx_code bypass_code, first_code, second_code;
7147 fpcmp_mode = ix86_fp_compare_mode (code);
7148 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7151 *second_test = NULL_RTX;
7153 *bypass_test = NULL_RTX;
7155 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7157 /* Do fcomi/sahf based test when profitable. */
7158 if ((bypass_code == NIL || bypass_test)
7159 && (second_code == NIL || second_test)
7160 && ix86_fp_comparison_arithmetics_cost (code) > cost)
7164 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7165 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7171 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7172 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7174 scratch = gen_reg_rtx (HImode);
7175 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7176 emit_insn (gen_x86_sahf_1 (scratch));
7179 /* The FP codes work out to act like unsigned. */
7180 intcmp_mode = fpcmp_mode;
7182 if (bypass_code != NIL)
7183 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7184 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7186 if (second_code != NIL)
7187 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7188 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7193 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7194 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7195 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7197 scratch = gen_reg_rtx (HImode);
7198 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7200 /* In the unordered case, we have to check C2 for NaN's, which
7201 doesn't happen to work out to anything nice combination-wise.
7202 So do some bit twiddling on the value we've got in AH to come
7203 up with an appropriate set of condition codes. */
7205 intcmp_mode = CCNOmode;
7210 if (code == GT || !TARGET_IEEE_FP)
7212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7217 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7218 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7219 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7220 intcmp_mode = CCmode;
7226 if (code == LT && TARGET_IEEE_FP)
7228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7229 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7230 intcmp_mode = CCmode;
7235 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7241 if (code == GE || !TARGET_IEEE_FP)
7243 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7248 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7249 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7256 if (code == LE && TARGET_IEEE_FP)
7258 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7259 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7260 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7261 intcmp_mode = CCmode;
7266 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7272 if (code == EQ && TARGET_IEEE_FP)
7274 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7275 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7276 intcmp_mode = CCmode;
7281 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7288 if (code == NE && TARGET_IEEE_FP)
7290 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7291 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7297 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7303 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7307 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7316 /* Return the test that should be put into the flags user, i.e.
7317 the bcc, scc, or cmov instruction. */
7318 return gen_rtx_fmt_ee (code, VOIDmode,
7319 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7324 ix86_expand_compare (code, second_test, bypass_test)
7326 rtx *second_test, *bypass_test;
7329 op0 = ix86_compare_op0;
7330 op1 = ix86_compare_op1;
7333 *second_test = NULL_RTX;
7335 *bypass_test = NULL_RTX;
7337 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7338 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7339 second_test, bypass_test);
7341 ret = ix86_expand_int_compare (code, op0, op1);
7346 /* Return true if the CODE will result in nontrivial jump sequence. */
7348 ix86_fp_jump_nontrivial_p (code)
7351 enum rtx_code bypass_code, first_code, second_code;
7354 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7355 return bypass_code != NIL || second_code != NIL;
7359 ix86_expand_branch (code, label)
7365 switch (GET_MODE (ix86_compare_op0))
7371 tmp = ix86_expand_compare (code, NULL, NULL);
7372 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7373 gen_rtx_LABEL_REF (VOIDmode, label),
7375 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7385 enum rtx_code bypass_code, first_code, second_code;
7387 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7390 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7392 /* Check whether we will use the natural sequence with one jump. If
7393 so, we can expand jump early. Otherwise delay expansion by
7394 creating compound insn to not confuse optimizers. */
7395 if (bypass_code == NIL && second_code == NIL
7398 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7399 gen_rtx_LABEL_REF (VOIDmode, label),
7404 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7405 ix86_compare_op0, ix86_compare_op1);
7406 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7407 gen_rtx_LABEL_REF (VOIDmode, label),
7409 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7411 use_fcomi = ix86_use_fcomi_compare (code);
7412 vec = rtvec_alloc (3 + !use_fcomi);
7413 RTVEC_ELT (vec, 0) = tmp;
7415 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7417 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7420 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7422 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7430 /* Expand DImode branch into multiple compare+branch. */
7432 rtx lo[2], hi[2], label2;
7433 enum rtx_code code1, code2, code3;
7435 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7437 tmp = ix86_compare_op0;
7438 ix86_compare_op0 = ix86_compare_op1;
7439 ix86_compare_op1 = tmp;
7440 code = swap_condition (code);
7442 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7443 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7445 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7446 avoid two branches. This costs one extra insn, so disable when
7447 optimizing for size. */
7449 if ((code == EQ || code == NE)
7451 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7456 if (hi[1] != const0_rtx)
7457 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7458 NULL_RTX, 0, OPTAB_WIDEN);
7461 if (lo[1] != const0_rtx)
7462 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7463 NULL_RTX, 0, OPTAB_WIDEN);
7465 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7466 NULL_RTX, 0, OPTAB_WIDEN);
7468 ix86_compare_op0 = tmp;
7469 ix86_compare_op1 = const0_rtx;
7470 ix86_expand_branch (code, label);
7474 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7475 op1 is a constant and the low word is zero, then we can just
7476 examine the high word. */
7478 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7481 case LT: case LTU: case GE: case GEU:
7482 ix86_compare_op0 = hi[0];
7483 ix86_compare_op1 = hi[1];
7484 ix86_expand_branch (code, label);
7490 /* Otherwise, we need two or three jumps. */
7492 label2 = gen_label_rtx ();
7495 code2 = swap_condition (code);
7496 code3 = unsigned_condition (code);
7500 case LT: case GT: case LTU: case GTU:
7503 case LE: code1 = LT; code2 = GT; break;
7504 case GE: code1 = GT; code2 = LT; break;
7505 case LEU: code1 = LTU; code2 = GTU; break;
7506 case GEU: code1 = GTU; code2 = LTU; break;
7508 case EQ: code1 = NIL; code2 = NE; break;
7509 case NE: code2 = NIL; break;
7517 * if (hi(a) < hi(b)) goto true;
7518 * if (hi(a) > hi(b)) goto false;
7519 * if (lo(a) < lo(b)) goto true;
7523 ix86_compare_op0 = hi[0];
7524 ix86_compare_op1 = hi[1];
7527 ix86_expand_branch (code1, label);
7529 ix86_expand_branch (code2, label2);
7531 ix86_compare_op0 = lo[0];
7532 ix86_compare_op1 = lo[1];
7533 ix86_expand_branch (code3, label);
7536 emit_label (label2);
7545 /* Split branch based on floating point condition. */
7547 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7549 rtx op1, op2, target1, target2, tmp;
7552 rtx label = NULL_RTX;
7554 int bypass_probability = -1, second_probability = -1, probability = -1;
7557 if (target2 != pc_rtx)
7560 code = reverse_condition_maybe_unordered (code);
7565 condition = ix86_expand_fp_compare (code, op1, op2,
7566 tmp, &second, &bypass);
7568 if (split_branch_probability >= 0)
7570 /* Distribute the probabilities across the jumps.
7571 Assume the BYPASS and SECOND to be always test
7573 probability = split_branch_probability;
7575 /* Value of 1 is low enought to make no need for probability
7576 to be updated. Later we may run some experiments and see
7577 if unordered values are more frequent in practice. */
7579 bypass_probability = 1;
7581 second_probability = 1;
7583 if (bypass != NULL_RTX)
7585 label = gen_label_rtx ();
7586 i = emit_jump_insn (gen_rtx_SET
7588 gen_rtx_IF_THEN_ELSE (VOIDmode,
7590 gen_rtx_LABEL_REF (VOIDmode,
7593 if (bypass_probability >= 0)
7595 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7596 GEN_INT (bypass_probability),
7599 i = emit_jump_insn (gen_rtx_SET
7601 gen_rtx_IF_THEN_ELSE (VOIDmode,
7602 condition, target1, target2)));
7603 if (probability >= 0)
7605 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7606 GEN_INT (probability),
7608 if (second != NULL_RTX)
7610 i = emit_jump_insn (gen_rtx_SET
7612 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7614 if (second_probability >= 0)
7616 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7617 GEN_INT (second_probability),
7620 if (label != NULL_RTX)
7625 ix86_expand_setcc (code, dest)
7629 rtx ret, tmp, tmpreg;
7630 rtx second_test, bypass_test;
7632 if (GET_MODE (ix86_compare_op0) == DImode
7634 return 0; /* FAIL */
7636 if (GET_MODE (dest) != QImode)
7639 ret = ix86_expand_compare (code, &second_test, &bypass_test);
7640 PUT_MODE (ret, QImode);
7645 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7646 if (bypass_test || second_test)
7648 rtx test = second_test;
7650 rtx tmp2 = gen_reg_rtx (QImode);
7657 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7659 PUT_MODE (test, QImode);
7660 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7663 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7665 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7668 return 1; /* DONE */
7672 ix86_expand_int_movcc (operands)
7675 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7676 rtx compare_seq, compare_op;
7677 rtx second_test, bypass_test;
7678 enum machine_mode mode = GET_MODE (operands[0]);
7680 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7681 In case comparsion is done with immediate, we can convert it to LTU or
7682 GEU by altering the integer. */
7684 if ((code == LEU || code == GTU)
7685 && GET_CODE (ix86_compare_op1) == CONST_INT
7687 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
7688 && GET_CODE (operands[2]) == CONST_INT
7689 && GET_CODE (operands[3]) == CONST_INT)
7695 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7699 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7700 compare_seq = gen_sequence ();
7703 compare_code = GET_CODE (compare_op);
7705 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7706 HImode insns, we'd be swallowed in word prefix ops. */
7709 && (mode != DImode || TARGET_64BIT)
7710 && GET_CODE (operands[2]) == CONST_INT
7711 && GET_CODE (operands[3]) == CONST_INT)
7713 rtx out = operands[0];
7714 HOST_WIDE_INT ct = INTVAL (operands[2]);
7715 HOST_WIDE_INT cf = INTVAL (operands[3]);
7718 if ((compare_code == LTU || compare_code == GEU)
7719 && !second_test && !bypass_test)
7722 /* Detect overlap between destination and compare sources. */
7725 /* To simplify rest of code, restrict to the GEU case. */
7726 if (compare_code == LTU)
7731 compare_code = reverse_condition (compare_code);
7732 code = reverse_condition (code);
7736 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7737 || reg_overlap_mentioned_p (out, ix86_compare_op1))
7738 tmp = gen_reg_rtx (mode);
7740 emit_insn (compare_seq);
7742 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7744 emit_insn (gen_x86_movsicc_0_m1 (tmp));
7756 tmp = expand_simple_binop (mode, PLUS,
7758 tmp, 1, OPTAB_DIRECT);
7769 tmp = expand_simple_binop (mode, IOR,
7771 tmp, 1, OPTAB_DIRECT);
7773 else if (diff == -1 && ct)
7783 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7785 tmp = expand_simple_binop (mode, PLUS,
7787 tmp, 1, OPTAB_DIRECT);
7794 * andl cf - ct, dest
7799 tmp = expand_simple_binop (mode, AND,
7801 GEN_INT (trunc_int_for_mode
7803 tmp, 1, OPTAB_DIRECT);
7805 tmp = expand_simple_binop (mode, PLUS,
7807 tmp, 1, OPTAB_DIRECT);
7811 emit_move_insn (out, tmp);
7813 return 1; /* DONE */
7820 tmp = ct, ct = cf, cf = tmp;
7822 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7824 /* We may be reversing unordered compare to normal compare, that
7825 is not valid in general (we may convert non-trapping condition
7826 to trapping one), however on i386 we currently emit all
7827 comparisons unordered. */
7828 compare_code = reverse_condition_maybe_unordered (compare_code);
7829 code = reverse_condition_maybe_unordered (code);
7833 compare_code = reverse_condition (compare_code);
7834 code = reverse_condition (code);
7837 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
7838 || diff == 3 || diff == 5 || diff == 9)
7839 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
7845 * lea cf(dest*(ct-cf)),dest
7849 * This also catches the degenerate setcc-only case.
7855 out = emit_store_flag (out, code, ix86_compare_op0,
7856 ix86_compare_op1, VOIDmode, 0, 1);
7859 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
7860 done in proper mode to match. */
7867 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
7871 tmp = gen_rtx_PLUS (mode, tmp, out1);
7877 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
7881 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
7887 clob = gen_rtx_REG (CCmode, FLAGS_REG);
7888 clob = gen_rtx_CLOBBER (VOIDmode, clob);
7890 tmp = gen_rtx_SET (VOIDmode, out, tmp);
7891 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7895 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
7897 if (out != operands[0])
7898 emit_move_insn (operands[0], out);
7900 return 1; /* DONE */
7904 * General case: Jumpful:
7905 * xorl dest,dest cmpl op1, op2
7906 * cmpl op1, op2 movl ct, dest
7908 * decl dest movl cf, dest
7909 * andl (cf-ct),dest 1:
7914 * This is reasonably steep, but branch mispredict costs are
7915 * high on modern cpus, so consider failing only if optimizing
7918 * %%% Parameterize branch_cost on the tuning architecture, then
7919 * use that. The 80386 couldn't care less about mispredicts.
7922 if (!optimize_size && !TARGET_CMOVE)
7928 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7930 /* We may be reversing unordered compare to normal compare,
7931 that is not valid in general (we may convert non-trapping
7932 condition to trapping one), however on i386 we currently
7933 emit all comparisons unordered. */
7934 compare_code = reverse_condition_maybe_unordered (compare_code);
7935 code = reverse_condition_maybe_unordered (code);
7939 compare_code = reverse_condition (compare_code);
7940 code = reverse_condition (code);
7944 out = emit_store_flag (out, code, ix86_compare_op0,
7945 ix86_compare_op1, VOIDmode, 0, 1);
7947 out = expand_simple_binop (mode, PLUS,
7949 out, 1, OPTAB_DIRECT);
7950 out = expand_simple_binop (mode, AND,
7952 GEN_INT (trunc_int_for_mode
7954 out, 1, OPTAB_DIRECT);
7955 out = expand_simple_binop (mode, PLUS,
7957 out, 1, OPTAB_DIRECT);
7958 if (out != operands[0])
7959 emit_move_insn (operands[0], out);
7961 return 1; /* DONE */
7967 /* Try a few things more with specific constants and a variable. */
7970 rtx var, orig_out, out, tmp;
7973 return 0; /* FAIL */
7975 /* If one of the two operands is an interesting constant, load a
7976 constant with the above and mask it in with a logical operation. */
7978 if (GET_CODE (operands[2]) == CONST_INT)
7981 if (INTVAL (operands[2]) == 0)
7982 operands[3] = constm1_rtx, op = and_optab;
7983 else if (INTVAL (operands[2]) == -1)
7984 operands[3] = const0_rtx, op = ior_optab;
7986 return 0; /* FAIL */
7988 else if (GET_CODE (operands[3]) == CONST_INT)
7991 if (INTVAL (operands[3]) == 0)
7992 operands[2] = constm1_rtx, op = and_optab;
7993 else if (INTVAL (operands[3]) == -1)
7994 operands[2] = const0_rtx, op = ior_optab;
7996 return 0; /* FAIL */
7999 return 0; /* FAIL */
8001 orig_out = operands[0];
8002 tmp = gen_reg_rtx (mode);
8005 /* Recurse to get the constant loaded. */
8006 if (ix86_expand_int_movcc (operands) == 0)
8007 return 0; /* FAIL */
8009 /* Mask in the interesting variable. */
8010 out = expand_binop (mode, op, var, tmp, orig_out, 0,
8012 if (out != orig_out)
8013 emit_move_insn (orig_out, out);
8015 return 1; /* DONE */
8019 * For comparison with above,
8029 if (! nonimmediate_operand (operands[2], mode))
8030 operands[2] = force_reg (mode, operands[2]);
8031 if (! nonimmediate_operand (operands[3], mode))
8032 operands[3] = force_reg (mode, operands[3]);
8034 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8036 rtx tmp = gen_reg_rtx (mode);
8037 emit_move_insn (tmp, operands[3]);
8040 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8042 rtx tmp = gen_reg_rtx (mode);
8043 emit_move_insn (tmp, operands[2]);
8046 if (! register_operand (operands[2], VOIDmode)
8047 && ! register_operand (operands[3], VOIDmode))
8048 operands[2] = force_reg (mode, operands[2]);
8050 emit_insn (compare_seq);
8051 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8052 gen_rtx_IF_THEN_ELSE (mode,
8053 compare_op, operands[2],
8056 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8057 gen_rtx_IF_THEN_ELSE (mode,
8062 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8063 gen_rtx_IF_THEN_ELSE (mode,
8068 return 1; /* DONE */
8072 ix86_expand_fp_movcc (operands)
8077 rtx compare_op, second_test, bypass_test;
8079 /* For SF/DFmode conditional moves based on comparisons
8080 in same mode, we may want to use SSE min/max instructions. */
8081 if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
8082 || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
8083 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8084 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8086 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8087 /* We may be called from the post-reload splitter. */
8088 && (!REG_P (operands[0])
8089 || SSE_REG_P (operands[0])
8090 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8092 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8093 code = GET_CODE (operands[1]);
8095 /* See if we have (cross) match between comparison operands and
8096 conditional move operands. */
8097 if (rtx_equal_p (operands[2], op1))
8102 code = reverse_condition_maybe_unordered (code);
8104 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8106 /* Check for min operation. */
8109 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8110 if (memory_operand (op0, VOIDmode))
8111 op0 = force_reg (GET_MODE (operands[0]), op0);
8112 if (GET_MODE (operands[0]) == SFmode)
8113 emit_insn (gen_minsf3 (operands[0], op0, op1));
8115 emit_insn (gen_mindf3 (operands[0], op0, op1));
8118 /* Check for max operation. */
8121 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8122 if (memory_operand (op0, VOIDmode))
8123 op0 = force_reg (GET_MODE (operands[0]), op0);
8124 if (GET_MODE (operands[0]) == SFmode)
8125 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8127 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8131 /* Manage condition to be sse_comparison_operator. In case we are
8132 in non-ieee mode, try to canonicalize the destination operand
8133 to be first in the comparison - this helps reload to avoid extra
8135 if (!sse_comparison_operator (operands[1], VOIDmode)
8136 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8138 rtx tmp = ix86_compare_op0;
8139 ix86_compare_op0 = ix86_compare_op1;
8140 ix86_compare_op1 = tmp;
8141 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8142 VOIDmode, ix86_compare_op0,
8145 /* Similary try to manage result to be first operand of conditional
8146 move. We also don't support the NE comparison on SSE, so try to
8148 if ((rtx_equal_p (operands[0], operands[3])
8149 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8150 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8152 rtx tmp = operands[2];
8153 operands[2] = operands[3];
8155 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8156 (GET_CODE (operands[1])),
8157 VOIDmode, ix86_compare_op0,
8160 if (GET_MODE (operands[0]) == SFmode)
8161 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8162 operands[2], operands[3],
8163 ix86_compare_op0, ix86_compare_op1));
8165 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8166 operands[2], operands[3],
8167 ix86_compare_op0, ix86_compare_op1));
8171 /* The floating point conditional move instructions don't directly
8172 support conditions resulting from a signed integer comparison. */
8174 code = GET_CODE (operands[1]);
8175 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8177 /* The floating point conditional move instructions don't directly
8178 support signed integer comparisons. */
8180 if (!fcmov_comparison_operator (compare_op, VOIDmode))
8182 if (second_test != NULL || bypass_test != NULL)
8184 tmp = gen_reg_rtx (QImode);
8185 ix86_expand_setcc (code, tmp);
8187 ix86_compare_op0 = tmp;
8188 ix86_compare_op1 = const0_rtx;
8189 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8191 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8193 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8194 emit_move_insn (tmp, operands[3]);
8197 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8199 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8200 emit_move_insn (tmp, operands[2]);
8204 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8205 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8211 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8216 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8217 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8225 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8226 works for floating pointer parameters and nonoffsetable memories.
8227 For pushes, it returns just stack offsets; the values will be saved
8228 in the right order. Maximally three parts are generated. */
8231 ix86_split_to_parts (operand, parts, mode)
8234 enum machine_mode mode;
8239 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8241 size = (GET_MODE_SIZE (mode) + 4) / 8;
8243 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8245 if (size < 2 || size > 3)
8248 /* Optimize constant pool reference to immediates. This is used by fp moves,
8249 that force all constants to memory to allow combining. */
8251 if (GET_CODE (operand) == MEM
8252 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8254 operand = get_pool_constant (XEXP (operand, 0));
8256 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8258 /* The only non-offsetable memories we handle are pushes. */
8259 if (! push_operand (operand, VOIDmode))
8262 operand = copy_rtx (operand);
8263 PUT_MODE (operand, Pmode);
8264 parts[0] = parts[1] = parts[2] = operand;
8266 else if (!TARGET_64BIT)
8269 split_di (&operand, 1, &parts[0], &parts[1]);
8272 if (REG_P (operand))
8274 if (!reload_completed)
8276 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8277 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8279 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8281 else if (offsettable_memref_p (operand))
8283 operand = adjust_address (operand, SImode, 0);
8285 parts[1] = adjust_address (operand, SImode, 4);
8287 parts[2] = adjust_address (operand, SImode, 8);
8289 else if (GET_CODE (operand) == CONST_DOUBLE)
8294 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8299 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8300 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8303 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8308 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8309 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8318 split_ti (&operand, 1, &parts[0], &parts[1]);
8319 if (mode == XFmode || mode == TFmode)
8321 if (REG_P (operand))
8323 if (!reload_completed)
8325 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8326 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8328 else if (offsettable_memref_p (operand))
8330 operand = adjust_address (operand, DImode, 0);
8332 parts[1] = adjust_address (operand, SImode, 8);
8334 else if (GET_CODE (operand) == CONST_DOUBLE)
8339 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8340 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8341 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8342 if (HOST_BITS_PER_WIDE_INT >= 64)
8344 = GEN_INT (trunc_int_for_mode
8345 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8346 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8349 parts[0] = immed_double_const (l[0], l[1], DImode);
8350 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8360 /* Emit insns to perform a move or push of DI, DF, and XF values.
8361 Return false when normal moves are needed; true when all required
8362 insns have been emitted. Operands 2-4 contain the input values
8363 int the correct order; operands 5-7 contain the output values. */
8366 ix86_split_long_move (operands)
8373 enum machine_mode mode = GET_MODE (operands[0]);
8375 /* The DFmode expanders may ask us to move double.
8376 For 64bit target this is single move. By hiding the fact
8377 here we simplify i386.md splitters. */
8378 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8380 /* Optimize constant pool reference to immediates. This is used by fp moves,
8381 that force all constants to memory to allow combining. */
8383 if (GET_CODE (operands[1]) == MEM
8384 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8385 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8386 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8387 if (push_operand (operands[0], VOIDmode))
8389 operands[0] = copy_rtx (operands[0]);
8390 PUT_MODE (operands[0], Pmode);
8393 operands[0] = gen_lowpart (DImode, operands[0]);
8394 operands[1] = gen_lowpart (DImode, operands[1]);
8395 emit_move_insn (operands[0], operands[1]);
8399 /* The only non-offsettable memory we handle is push. */
8400 if (push_operand (operands[0], VOIDmode))
8402 else if (GET_CODE (operands[0]) == MEM
8403 && ! offsettable_memref_p (operands[0]))
8406 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8407 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8409 /* When emitting push, take care for source operands on the stack. */
8410 if (push && GET_CODE (operands[1]) == MEM
8411 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8414 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8415 XEXP (part[1][2], 0));
8416 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8417 XEXP (part[1][1], 0));
8420 /* We need to do copy in the right order in case an address register
8421 of the source overlaps the destination. */
8422 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8424 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8426 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8429 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8432 /* Collision in the middle part can be handled by reordering. */
8433 if (collisions == 1 && nparts == 3
8434 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8437 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8438 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8441 /* If there are more collisions, we can't handle it by reordering.
8442 Do an lea to the last part and use only one colliding move. */
8443 else if (collisions > 1)
8446 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8447 XEXP (part[1][0], 0)));
8448 part[1][0] = change_address (part[1][0],
8449 TARGET_64BIT ? DImode : SImode,
8450 part[0][nparts - 1]);
8451 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8453 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8463 /* We use only first 12 bytes of TFmode value, but for pushing we
8464 are required to adjust stack as if we were pushing real 16byte
8466 if (mode == TFmode && !TARGET_64BIT)
8467 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8469 emit_move_insn (part[0][2], part[1][2]);
8474 /* In 64bit mode we don't have 32bit push available. In case this is
8475 register, it is OK - we will just use larger counterpart. We also
8476 retype memory - these comes from attempt to avoid REX prefix on
8477 moving of second half of TFmode value. */
8478 if (GET_MODE (part[1][1]) == SImode)
8480 if (GET_CODE (part[1][1]) == MEM)
8481 part[1][1] = adjust_address (part[1][1], DImode, 0);
8482 else if (REG_P (part[1][1]))
8483 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8486 if (GET_MODE (part[1][0]) == SImode)
8487 part[1][0] = part[1][1];
8490 emit_move_insn (part[0][1], part[1][1]);
8491 emit_move_insn (part[0][0], part[1][0]);
8495 /* Choose correct order to not overwrite the source before it is copied. */
8496 if ((REG_P (part[0][0])
8497 && REG_P (part[1][1])
8498 && (REGNO (part[0][0]) == REGNO (part[1][1])
8500 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8502 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8506 operands[2] = part[0][2];
8507 operands[3] = part[0][1];
8508 operands[4] = part[0][0];
8509 operands[5] = part[1][2];
8510 operands[6] = part[1][1];
8511 operands[7] = part[1][0];
8515 operands[2] = part[0][1];
8516 operands[3] = part[0][0];
8517 operands[5] = part[1][1];
8518 operands[6] = part[1][0];
8525 operands[2] = part[0][0];
8526 operands[3] = part[0][1];
8527 operands[4] = part[0][2];
8528 operands[5] = part[1][0];
8529 operands[6] = part[1][1];
8530 operands[7] = part[1][2];
8534 operands[2] = part[0][0];
8535 operands[3] = part[0][1];
8536 operands[5] = part[1][0];
8537 operands[6] = part[1][1];
8540 emit_move_insn (operands[2], operands[5]);
8541 emit_move_insn (operands[3], operands[6]);
8543 emit_move_insn (operands[4], operands[7]);
8549 ix86_split_ashldi (operands, scratch)
8550 rtx *operands, scratch;
8552 rtx low[2], high[2];
8555 if (GET_CODE (operands[2]) == CONST_INT)
8557 split_di (operands, 2, low, high);
8558 count = INTVAL (operands[2]) & 63;
8562 emit_move_insn (high[0], low[1]);
8563 emit_move_insn (low[0], const0_rtx);
8566 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8570 if (!rtx_equal_p (operands[0], operands[1]))
8571 emit_move_insn (operands[0], operands[1]);
8572 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8573 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8578 if (!rtx_equal_p (operands[0], operands[1]))
8579 emit_move_insn (operands[0], operands[1]);
8581 split_di (operands, 1, low, high);
8583 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8584 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8586 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8588 if (! no_new_pseudos)
8589 scratch = force_reg (SImode, const0_rtx);
8591 emit_move_insn (scratch, const0_rtx);
8593 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8597 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8602 ix86_split_ashrdi (operands, scratch)
8603 rtx *operands, scratch;
8605 rtx low[2], high[2];
8608 if (GET_CODE (operands[2]) == CONST_INT)
8610 split_di (operands, 2, low, high);
8611 count = INTVAL (operands[2]) & 63;
8615 emit_move_insn (low[0], high[1]);
8617 if (! reload_completed)
8618 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8621 emit_move_insn (high[0], low[0]);
8622 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8626 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8630 if (!rtx_equal_p (operands[0], operands[1]))
8631 emit_move_insn (operands[0], operands[1]);
8632 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8633 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8638 if (!rtx_equal_p (operands[0], operands[1]))
8639 emit_move_insn (operands[0], operands[1]);
8641 split_di (operands, 1, low, high);
8643 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8644 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8646 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8648 if (! no_new_pseudos)
8649 scratch = gen_reg_rtx (SImode);
8650 emit_move_insn (scratch, high[0]);
8651 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8652 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8656 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8661 ix86_split_lshrdi (operands, scratch)
8662 rtx *operands, scratch;
8664 rtx low[2], high[2];
8667 if (GET_CODE (operands[2]) == CONST_INT)
8669 split_di (operands, 2, low, high);
8670 count = INTVAL (operands[2]) & 63;
8674 emit_move_insn (low[0], high[1]);
8675 emit_move_insn (high[0], const0_rtx);
8678 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8682 if (!rtx_equal_p (operands[0], operands[1]))
8683 emit_move_insn (operands[0], operands[1]);
8684 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8685 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8690 if (!rtx_equal_p (operands[0], operands[1]))
8691 emit_move_insn (operands[0], operands[1]);
8693 split_di (operands, 1, low, high);
8695 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8696 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8698 /* Heh. By reversing the arguments, we can reuse this pattern. */
8699 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8701 if (! no_new_pseudos)
8702 scratch = force_reg (SImode, const0_rtx);
8704 emit_move_insn (scratch, const0_rtx);
8706 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8710 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8714 /* Helper function for the string operations below. Dest VARIABLE whether
8715 it is aligned to VALUE bytes. If true, jump to the label. */
8717 ix86_expand_aligntest (variable, value)
8721 rtx label = gen_label_rtx ();
8722 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8723 if (GET_MODE (variable) == DImode)
8724 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8726 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8727 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8732 /* Adjust COUNTER by the VALUE. */
8734 ix86_adjust_counter (countreg, value)
8736 HOST_WIDE_INT value;
8738 if (GET_MODE (countreg) == DImode)
8739 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8741 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8744 /* Zero extend possibly SImode EXP to Pmode register. */
8746 ix86_zero_extend_to_Pmode (exp)
8750 if (GET_MODE (exp) == VOIDmode)
8751 return force_reg (Pmode, exp);
8752 if (GET_MODE (exp) == Pmode)
8753 return copy_to_mode_reg (Pmode, exp);
8754 r = gen_reg_rtx (Pmode);
8755 emit_insn (gen_zero_extendsidi2 (r, exp));
8759 /* Expand string move (memcpy) operation. Use i386 string operations when
8760 profitable. expand_clrstr contains similar code. */
8762 ix86_expand_movstr (dst, src, count_exp, align_exp)
8763 rtx dst, src, count_exp, align_exp;
8765 rtx srcreg, destreg, countreg;
8766 enum machine_mode counter_mode;
8767 HOST_WIDE_INT align = 0;
8768 unsigned HOST_WIDE_INT count = 0;
8773 if (GET_CODE (align_exp) == CONST_INT)
8774 align = INTVAL (align_exp);
8776 /* This simple hack avoids all inlining code and simplifies code below. */
8777 if (!TARGET_ALIGN_STRINGOPS)
8780 if (GET_CODE (count_exp) == CONST_INT)
8781 count = INTVAL (count_exp);
8783 /* Figure out proper mode for counter. For 32bits it is always SImode,
8784 for 64bits use SImode when possible, otherwise DImode.
8785 Set count to number of bytes copied when known at compile time. */
8786 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8787 || x86_64_zero_extended_value (count_exp))
8788 counter_mode = SImode;
8790 counter_mode = DImode;
8792 if (counter_mode != SImode && counter_mode != DImode)
8795 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8796 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8798 emit_insn (gen_cld ());
8800 /* When optimizing for size emit simple rep ; movsb instruction for
8801 counts not divisible by 4. */
8803 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8805 countreg = ix86_zero_extend_to_Pmode (count_exp);
8807 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8808 destreg, srcreg, countreg));
8810 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8811 destreg, srcreg, countreg));
8814 /* For constant aligned (or small unaligned) copies use rep movsl
8815 followed by code copying the rest. For PentiumPro ensure 8 byte
8816 alignment to allow rep movsl acceleration. */
8820 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8821 || optimize_size || count < (unsigned int)64))
8823 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
8824 if (count & ~(size - 1))
8826 countreg = copy_to_mode_reg (counter_mode,
8827 GEN_INT ((count >> (size == 4 ? 2 : 3))
8828 & (TARGET_64BIT ? -1 : 0x3fffffff)));
8829 countreg = ix86_zero_extend_to_Pmode (countreg);
8833 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
8834 destreg, srcreg, countreg));
8836 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
8837 destreg, srcreg, countreg));
8840 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
8841 destreg, srcreg, countreg));
8843 if (size == 8 && (count & 0x04))
8844 emit_insn (gen_strmovsi (destreg, srcreg));
8846 emit_insn (gen_strmovhi (destreg, srcreg));
8848 emit_insn (gen_strmovqi (destreg, srcreg));
8850 /* The generic code based on the glibc implementation:
8851 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
8852 allowing accelerated copying there)
8853 - copy the data using rep movsl
8860 /* In case we don't know anything about the alignment, default to
8861 library version, since it is usually equally fast and result in
8863 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
8869 if (TARGET_SINGLE_STRINGOP)
8870 emit_insn (gen_cld ());
8872 countreg2 = gen_reg_rtx (Pmode);
8873 countreg = copy_to_mode_reg (counter_mode, count_exp);
8875 /* We don't use loops to align destination and to copy parts smaller
8876 than 4 bytes, because gcc is able to optimize such code better (in
8877 the case the destination or the count really is aligned, gcc is often
8878 able to predict the branches) and also it is friendlier to the
8879 hardware branch prediction.
8881 Using loops is benefical for generic case, because we can
8882 handle small counts using the loops. Many CPUs (such as Athlon)
8883 have large REP prefix setup costs.
8885 This is quite costy. Maybe we can revisit this decision later or
8886 add some customizability to this code. */
8889 && align < (TARGET_PENTIUMPRO && (count == 0
8890 || count >= (unsigned int)260)
8891 ? 8 : UNITS_PER_WORD))
8893 label = gen_label_rtx ();
8894 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
8895 LEU, 0, counter_mode, 1, label);
8899 rtx label = ix86_expand_aligntest (destreg, 1);
8900 emit_insn (gen_strmovqi (destreg, srcreg));
8901 ix86_adjust_counter (countreg, 1);
8903 LABEL_NUSES (label) = 1;
8907 rtx label = ix86_expand_aligntest (destreg, 2);
8908 emit_insn (gen_strmovhi (destreg, srcreg));
8909 ix86_adjust_counter (countreg, 2);
8911 LABEL_NUSES (label) = 1;
8914 && ((TARGET_PENTIUMPRO && (count == 0
8915 || count >= (unsigned int)260))
8918 rtx label = ix86_expand_aligntest (destreg, 4);
8919 emit_insn (gen_strmovsi (destreg, srcreg));
8920 ix86_adjust_counter (countreg, 4);
8922 LABEL_NUSES (label) = 1;
8925 if (!TARGET_SINGLE_STRINGOP)
8926 emit_insn (gen_cld ());
8929 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
8931 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
8932 destreg, srcreg, countreg2));
8936 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
8937 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
8938 destreg, srcreg, countreg2));
8944 LABEL_NUSES (label) = 1;
8946 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
8947 emit_insn (gen_strmovsi (destreg, srcreg));
8948 if ((align <= 4 || count == 0) && TARGET_64BIT)
8950 rtx label = ix86_expand_aligntest (countreg, 4);
8951 emit_insn (gen_strmovsi (destreg, srcreg));
8953 LABEL_NUSES (label) = 1;
8955 if (align > 2 && count != 0 && (count & 2))
8956 emit_insn (gen_strmovhi (destreg, srcreg));
8957 if (align <= 2 || count == 0)
8959 rtx label = ix86_expand_aligntest (countreg, 2);
8960 emit_insn (gen_strmovhi (destreg, srcreg));
8962 LABEL_NUSES (label) = 1;
8964 if (align > 1 && count != 0 && (count & 1))
8965 emit_insn (gen_strmovqi (destreg, srcreg));
8966 if (align <= 1 || count == 0)
8968 rtx label = ix86_expand_aligntest (countreg, 1);
8969 emit_insn (gen_strmovqi (destreg, srcreg));
8971 LABEL_NUSES (label) = 1;
8975 insns = get_insns ();
8978 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
8983 /* Expand string clear operation (bzero). Use i386 string operations when
8984 profitable. expand_movstr contains similar code. */
8986 ix86_expand_clrstr (src, count_exp, align_exp)
8987 rtx src, count_exp, align_exp;
8989 rtx destreg, zeroreg, countreg;
8990 enum machine_mode counter_mode;
8991 HOST_WIDE_INT align = 0;
8992 unsigned HOST_WIDE_INT count = 0;
8994 if (GET_CODE (align_exp) == CONST_INT)
8995 align = INTVAL (align_exp);
8997 /* This simple hack avoids all inlining code and simplifies code below. */
8998 if (!TARGET_ALIGN_STRINGOPS)
9001 if (GET_CODE (count_exp) == CONST_INT)
9002 count = INTVAL (count_exp);
9003 /* Figure out proper mode for counter. For 32bits it is always SImode,
9004 for 64bits use SImode when possible, otherwise DImode.
9005 Set count to number of bytes copied when known at compile time. */
9006 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9007 || x86_64_zero_extended_value (count_exp))
9008 counter_mode = SImode;
9010 counter_mode = DImode;
9012 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9014 emit_insn (gen_cld ());
9016 /* When optimizing for size emit simple rep ; movsb instruction for
9017 counts not divisible by 4. */
9019 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9021 countreg = ix86_zero_extend_to_Pmode (count_exp);
9022 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9024 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9025 destreg, countreg));
9027 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9028 destreg, countreg));
9032 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9033 || optimize_size || count < (unsigned int)64))
9035 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9036 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9037 if (count & ~(size - 1))
9039 countreg = copy_to_mode_reg (counter_mode,
9040 GEN_INT ((count >> (size == 4 ? 2 : 3))
9041 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9042 countreg = ix86_zero_extend_to_Pmode (countreg);
9046 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9047 destreg, countreg));
9049 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9050 destreg, countreg));
9053 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9054 destreg, countreg));
9056 if (size == 8 && (count & 0x04))
9057 emit_insn (gen_strsetsi (destreg,
9058 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9060 emit_insn (gen_strsethi (destreg,
9061 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9063 emit_insn (gen_strsetqi (destreg,
9064 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9071 /* In case we don't know anything about the alignment, default to
9072 library version, since it is usually equally fast and result in
9074 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9077 if (TARGET_SINGLE_STRINGOP)
9078 emit_insn (gen_cld ());
9080 countreg2 = gen_reg_rtx (Pmode);
9081 countreg = copy_to_mode_reg (counter_mode, count_exp);
9082 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9085 && align < (TARGET_PENTIUMPRO && (count == 0
9086 || count >= (unsigned int)260)
9087 ? 8 : UNITS_PER_WORD))
9089 label = gen_label_rtx ();
9090 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9091 LEU, 0, counter_mode, 1, label);
9095 rtx label = ix86_expand_aligntest (destreg, 1);
9096 emit_insn (gen_strsetqi (destreg,
9097 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9098 ix86_adjust_counter (countreg, 1);
9100 LABEL_NUSES (label) = 1;
9104 rtx label = ix86_expand_aligntest (destreg, 2);
9105 emit_insn (gen_strsethi (destreg,
9106 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9107 ix86_adjust_counter (countreg, 2);
9109 LABEL_NUSES (label) = 1;
9111 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9112 || count >= (unsigned int)260))
9114 rtx label = ix86_expand_aligntest (destreg, 4);
9115 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9116 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9118 ix86_adjust_counter (countreg, 4);
9120 LABEL_NUSES (label) = 1;
9123 if (!TARGET_SINGLE_STRINGOP)
9124 emit_insn (gen_cld ());
9127 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9129 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9130 destreg, countreg2));
9134 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9135 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9136 destreg, countreg2));
9142 LABEL_NUSES (label) = 1;
9144 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9145 emit_insn (gen_strsetsi (destreg,
9146 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9147 if (TARGET_64BIT && (align <= 4 || count == 0))
9149 rtx label = ix86_expand_aligntest (destreg, 2);
9150 emit_insn (gen_strsetsi (destreg,
9151 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9153 LABEL_NUSES (label) = 1;
9155 if (align > 2 && count != 0 && (count & 2))
9156 emit_insn (gen_strsethi (destreg,
9157 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9158 if (align <= 2 || count == 0)
9160 rtx label = ix86_expand_aligntest (destreg, 2);
9161 emit_insn (gen_strsethi (destreg,
9162 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9164 LABEL_NUSES (label) = 1;
9166 if (align > 1 && count != 0 && (count & 1))
9167 emit_insn (gen_strsetqi (destreg,
9168 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9169 if (align <= 1 || count == 0)
9171 rtx label = ix86_expand_aligntest (destreg, 1);
9172 emit_insn (gen_strsetqi (destreg,
9173 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9175 LABEL_NUSES (label) = 1;
9180 /* Expand strlen. */
9182 ix86_expand_strlen (out, src, eoschar, align)
9183 rtx out, src, eoschar, align;
9185 rtx addr, scratch1, scratch2, scratch3, scratch4;
9187 /* The generic case of strlen expander is long. Avoid it's
9188 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9190 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9191 && !TARGET_INLINE_ALL_STRINGOPS
9193 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9196 addr = force_reg (Pmode, XEXP (src, 0));
9197 scratch1 = gen_reg_rtx (Pmode);
9199 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9202 /* Well it seems that some optimizer does not combine a call like
9203 foo(strlen(bar), strlen(bar));
9204 when the move and the subtraction is done here. It does calculate
9205 the length just once when these instructions are done inside of
9206 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9207 often used and I use one fewer register for the lifetime of
9208 output_strlen_unroll() this is better. */
9210 emit_move_insn (out, addr);
9212 ix86_expand_strlensi_unroll_1 (out, align);
9214 /* strlensi_unroll_1 returns the address of the zero at the end of
9215 the string, like memchr(), so compute the length by subtracting
9216 the start address. */
9218 emit_insn (gen_subdi3 (out, out, addr));
9220 emit_insn (gen_subsi3 (out, out, addr));
9224 scratch2 = gen_reg_rtx (Pmode);
9225 scratch3 = gen_reg_rtx (Pmode);
9226 scratch4 = force_reg (Pmode, constm1_rtx);
9228 emit_move_insn (scratch3, addr);
9229 eoschar = force_reg (QImode, eoschar);
9231 emit_insn (gen_cld ());
9234 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9235 align, scratch4, scratch3));
9236 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9237 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9241 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9242 align, scratch4, scratch3));
9243 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9244 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9250 /* Expand the appropriate insns for doing strlen if not just doing
9253 out = result, initialized with the start address
9254 align_rtx = alignment of the address.
9255 scratch = scratch register, initialized with the startaddress when
9256 not aligned, otherwise undefined
9258 This is just the body. It needs the initialisations mentioned above and
9259 some address computing at the end. These things are done in i386.md. */
9262 ix86_expand_strlensi_unroll_1 (out, align_rtx)
9267 rtx align_2_label = NULL_RTX;
9268 rtx align_3_label = NULL_RTX;
9269 rtx align_4_label = gen_label_rtx ();
9270 rtx end_0_label = gen_label_rtx ();
9272 rtx tmpreg = gen_reg_rtx (SImode);
9273 rtx scratch = gen_reg_rtx (SImode);
9276 if (GET_CODE (align_rtx) == CONST_INT)
9277 align = INTVAL (align_rtx);
9279 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9281 /* Is there a known alignment and is it less than 4? */
9284 rtx scratch1 = gen_reg_rtx (Pmode);
9285 emit_move_insn (scratch1, out);
9286 /* Is there a known alignment and is it not 2? */
9289 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9290 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9292 /* Leave just the 3 lower bits. */
9293 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9294 NULL_RTX, 0, OPTAB_WIDEN);
9296 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9297 Pmode, 1, align_4_label);
9298 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9299 Pmode, 1, align_2_label);
9300 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9301 Pmode, 1, align_3_label);
9305 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9306 check if is aligned to 4 - byte. */
9308 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9309 NULL_RTX, 0, OPTAB_WIDEN);
9311 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9312 Pmode, 1, align_4_label);
9315 mem = gen_rtx_MEM (QImode, out);
9317 /* Now compare the bytes. */
9319 /* Compare the first n unaligned byte on a byte per byte basis. */
9320 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9321 QImode, 1, end_0_label);
9323 /* Increment the address. */
9325 emit_insn (gen_adddi3 (out, out, const1_rtx));
9327 emit_insn (gen_addsi3 (out, out, const1_rtx));
9329 /* Not needed with an alignment of 2 */
9332 emit_label (align_2_label);
9334 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9338 emit_insn (gen_adddi3 (out, out, const1_rtx));
9340 emit_insn (gen_addsi3 (out, out, const1_rtx));
9342 emit_label (align_3_label);
9345 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9349 emit_insn (gen_adddi3 (out, out, const1_rtx));
9351 emit_insn (gen_addsi3 (out, out, const1_rtx));
9354 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9355 align this loop. It gives only huge programs, but does not help to
9357 emit_label (align_4_label);
9359 mem = gen_rtx_MEM (SImode, out);
9360 emit_move_insn (scratch, mem);
9362 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9364 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9366 /* This formula yields a nonzero result iff one of the bytes is zero.
9367 This saves three branches inside loop and many cycles. */
9369 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9370 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9371 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9372 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9373 GEN_INT (trunc_int_for_mode
9374 (0x80808080, SImode))));
9375 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9380 rtx reg = gen_reg_rtx (SImode);
9381 rtx reg2 = gen_reg_rtx (Pmode);
9382 emit_move_insn (reg, tmpreg);
9383 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9385 /* If zero is not in the first two bytes, move two bytes forward. */
9386 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9387 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9388 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9389 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9390 gen_rtx_IF_THEN_ELSE (SImode, tmp,
9393 /* Emit lea manually to avoid clobbering of flags. */
9394 emit_insn (gen_rtx_SET (SImode, reg2,
9395 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9397 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9398 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9399 emit_insn (gen_rtx_SET (VOIDmode, out,
9400 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9407 rtx end_2_label = gen_label_rtx ();
9408 /* Is zero in the first two bytes? */
9410 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9411 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9412 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9413 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9414 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9416 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9417 JUMP_LABEL (tmp) = end_2_label;
9419 /* Not in the first two. Move two bytes forward. */
9420 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9422 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9424 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9426 emit_label (end_2_label);
9430 /* Avoid branch in fixing the byte. */
9431 tmpreg = gen_lowpart (QImode, tmpreg);
9432 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9434 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9436 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9438 emit_label (end_0_label);
9441 /* Clear stack slot assignments remembered from previous functions.
9442 This is called from INIT_EXPANDERS once before RTL is emitted for each
9446 ix86_init_machine_status (p)
9449 p->machine = (struct machine_function *)
9450 xcalloc (1, sizeof (struct machine_function));
9453 /* Mark machine specific bits of P for GC. */
9455 ix86_mark_machine_status (p)
9458 struct machine_function *machine = p->machine;
9459 enum machine_mode mode;
9465 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9466 mode = (enum machine_mode) ((int) mode + 1))
9467 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9468 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9472 ix86_free_machine_status (p)
9479 /* Return a MEM corresponding to a stack slot with mode MODE.
9480 Allocate a new slot if necessary.
9482 The RTL for a function can have several slots available: N is
9483 which slot to use. */
9486 assign_386_stack_local (mode, n)
9487 enum machine_mode mode;
9490 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9493 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9494 ix86_stack_locals[(int) mode][n]
9495 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9497 return ix86_stack_locals[(int) mode][n];
9500 /* Calculate the length of the memory address in the instruction
9501 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9504 memory_address_length (addr)
9507 struct ix86_address parts;
9508 rtx base, index, disp;
9511 if (GET_CODE (addr) == PRE_DEC
9512 || GET_CODE (addr) == POST_INC
9513 || GET_CODE (addr) == PRE_MODIFY
9514 || GET_CODE (addr) == POST_MODIFY)
9517 if (! ix86_decompose_address (addr, &parts))
9521 index = parts.index;
9525 /* Register Indirect. */
9526 if (base && !index && !disp)
9528 /* Special cases: ebp and esp need the two-byte modrm form. */
9529 if (addr == stack_pointer_rtx
9530 || addr == arg_pointer_rtx
9531 || addr == frame_pointer_rtx
9532 || addr == hard_frame_pointer_rtx)
9536 /* Direct Addressing. */
9537 else if (disp && !base && !index)
9542 /* Find the length of the displacement constant. */
9545 if (GET_CODE (disp) == CONST_INT
9546 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9552 /* An index requires the two-byte modrm form. */
9560 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9561 expect that insn have 8bit immediate alternative. */
9563 ix86_attr_length_immediate_default (insn, shortform)
9569 extract_insn_cached (insn);
9570 for (i = recog_data.n_operands - 1; i >= 0; --i)
9571 if (CONSTANT_P (recog_data.operand[i]))
9576 && GET_CODE (recog_data.operand[i]) == CONST_INT
9577 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9581 switch (get_attr_mode (insn))
9592 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9597 fatal_insn ("unknown insn mode", insn);
9603 /* Compute default value for "length_address" attribute. */
9605 ix86_attr_length_address_default (insn)
9609 extract_insn_cached (insn);
9610 for (i = recog_data.n_operands - 1; i >= 0; --i)
9611 if (GET_CODE (recog_data.operand[i]) == MEM)
9613 return memory_address_length (XEXP (recog_data.operand[i], 0));
9619 /* Return the maximum number of instructions a cpu can issue. */
9626 case PROCESSOR_PENTIUM:
9630 case PROCESSOR_PENTIUMPRO:
9631 case PROCESSOR_PENTIUM4:
9632 case PROCESSOR_ATHLON:
9640 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9641 by DEP_INSN and nothing set by DEP_INSN. */
9644 ix86_flags_dependant (insn, dep_insn, insn_type)
9646 enum attr_type insn_type;
9650 /* Simplify the test for uninteresting insns. */
9651 if (insn_type != TYPE_SETCC
9652 && insn_type != TYPE_ICMOV
9653 && insn_type != TYPE_FCMOV
9654 && insn_type != TYPE_IBR)
9657 if ((set = single_set (dep_insn)) != 0)
9659 set = SET_DEST (set);
9662 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9663 && XVECLEN (PATTERN (dep_insn), 0) == 2
9664 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9665 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9667 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9668 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9673 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9676 /* This test is true if the dependent insn reads the flags but
9677 not any other potentially set register. */
9678 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9681 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9687 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9688 address with operands set by DEP_INSN. */
9691 ix86_agi_dependant (insn, dep_insn, insn_type)
9693 enum attr_type insn_type;
9697 if (insn_type == TYPE_LEA
9700 addr = PATTERN (insn);
9701 if (GET_CODE (addr) == SET)
9703 else if (GET_CODE (addr) == PARALLEL
9704 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9705 addr = XVECEXP (addr, 0, 0);
9708 addr = SET_SRC (addr);
9713 extract_insn_cached (insn);
9714 for (i = recog_data.n_operands - 1; i >= 0; --i)
9715 if (GET_CODE (recog_data.operand[i]) == MEM)
9717 addr = XEXP (recog_data.operand[i], 0);
9724 return modified_in_p (addr, dep_insn);
9728 ix86_adjust_cost (insn, link, dep_insn, cost)
9729 rtx insn, link, dep_insn;
9732 enum attr_type insn_type, dep_insn_type;
9733 enum attr_memory memory, dep_memory;
9735 int dep_insn_code_number;
9737 /* Anti and output depenancies have zero cost on all CPUs. */
9738 if (REG_NOTE_KIND (link) != 0)
9741 dep_insn_code_number = recog_memoized (dep_insn);
9743 /* If we can't recognize the insns, we can't really do anything. */
9744 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9747 insn_type = get_attr_type (insn);
9748 dep_insn_type = get_attr_type (dep_insn);
9752 case PROCESSOR_PENTIUM:
9753 /* Address Generation Interlock adds a cycle of latency. */
9754 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9757 /* ??? Compares pair with jump/setcc. */
9758 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9761 /* Floating point stores require value to be ready one cycle ealier. */
9762 if (insn_type == TYPE_FMOV
9763 && get_attr_memory (insn) == MEMORY_STORE
9764 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9768 case PROCESSOR_PENTIUMPRO:
9769 memory = get_attr_memory (insn);
9770 dep_memory = get_attr_memory (dep_insn);
9772 /* Since we can't represent delayed latencies of load+operation,
9773 increase the cost here for non-imov insns. */
9774 if (dep_insn_type != TYPE_IMOV
9775 && dep_insn_type != TYPE_FMOV
9776 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9779 /* INT->FP conversion is expensive. */
9780 if (get_attr_fp_int_src (dep_insn))
9783 /* There is one cycle extra latency between an FP op and a store. */
9784 if (insn_type == TYPE_FMOV
9785 && (set = single_set (dep_insn)) != NULL_RTX
9786 && (set2 = single_set (insn)) != NULL_RTX
9787 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9788 && GET_CODE (SET_DEST (set2)) == MEM)
9791 /* Show ability of reorder buffer to hide latency of load by executing
9792 in parallel with previous instruction in case
9793 previous instruction is not needed to compute the address. */
9794 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9795 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9797 /* Claim moves to take one cycle, as core can issue one load
9798 at time and the next load can start cycle later. */
9799 if (dep_insn_type == TYPE_IMOV
9800 || dep_insn_type == TYPE_FMOV)
9808 memory = get_attr_memory (insn);
9809 dep_memory = get_attr_memory (dep_insn);
9810 /* The esp dependency is resolved before the instruction is really
9812 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9813 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9816 /* Since we can't represent delayed latencies of load+operation,
9817 increase the cost here for non-imov insns. */
9818 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9819 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9821 /* INT->FP conversion is expensive. */
9822 if (get_attr_fp_int_src (dep_insn))
9825 /* Show ability of reorder buffer to hide latency of load by executing
9826 in parallel with previous instruction in case
9827 previous instruction is not needed to compute the address. */
9828 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9829 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9831 /* Claim moves to take one cycle, as core can issue one load
9832 at time and the next load can start cycle later. */
9833 if (dep_insn_type == TYPE_IMOV
9834 || dep_insn_type == TYPE_FMOV)
9843 case PROCESSOR_ATHLON:
9844 memory = get_attr_memory (insn);
9845 dep_memory = get_attr_memory (dep_insn);
9847 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
9849 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
9854 /* Show ability of reorder buffer to hide latency of load by executing
9855 in parallel with previous instruction in case
9856 previous instruction is not needed to compute the address. */
9857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9858 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9860 /* Claim moves to take one cycle, as core can issue one load
9861 at time and the next load can start cycle later. */
9862 if (dep_insn_type == TYPE_IMOV
9863 || dep_insn_type == TYPE_FMOV)
9880 struct ppro_sched_data
9883 int issued_this_cycle;
9888 ix86_safe_length (insn)
9891 if (recog_memoized (insn) >= 0)
9892 return get_attr_length(insn);
9898 ix86_safe_length_prefix (insn)
9901 if (recog_memoized (insn) >= 0)
9902 return get_attr_length(insn);
9907 static enum attr_memory
9908 ix86_safe_memory (insn)
9911 if (recog_memoized (insn) >= 0)
9912 return get_attr_memory(insn);
9914 return MEMORY_UNKNOWN;
9917 static enum attr_pent_pair
9918 ix86_safe_pent_pair (insn)
9921 if (recog_memoized (insn) >= 0)
9922 return get_attr_pent_pair(insn);
9924 return PENT_PAIR_NP;
9927 static enum attr_ppro_uops
9928 ix86_safe_ppro_uops (insn)
9931 if (recog_memoized (insn) >= 0)
9932 return get_attr_ppro_uops (insn);
9934 return PPRO_UOPS_MANY;
9938 ix86_dump_ppro_packet (dump)
9941 if (ix86_sched_data.ppro.decode[0])
9943 fprintf (dump, "PPRO packet: %d",
9944 INSN_UID (ix86_sched_data.ppro.decode[0]));
9945 if (ix86_sched_data.ppro.decode[1])
9946 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
9947 if (ix86_sched_data.ppro.decode[2])
9948 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
9953 /* We're beginning a new block. Initialize data structures as necessary. */
9956 ix86_sched_init (dump, sched_verbose, veclen)
9957 FILE *dump ATTRIBUTE_UNUSED;
9958 int sched_verbose ATTRIBUTE_UNUSED;
9959 int veclen ATTRIBUTE_UNUSED;
9961 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
9964 /* Shift INSN to SLOT, and shift everything else down. */
9967 ix86_reorder_insn (insnp, slot)
9974 insnp[0] = insnp[1];
9975 while (++insnp != slot);
9980 /* Find an instruction with given pairability and minimal amount of cycles
9981 lost by the fact that the CPU waits for both pipelines to finish before
9982 reading next instructions. Also take care that both instructions together
9983 can not exceed 7 bytes. */
9986 ix86_pent_find_pair (e_ready, ready, type, first)
9989 enum attr_pent_pair type;
9992 int mincycles, cycles;
9993 enum attr_pent_pair tmp;
9994 enum attr_memory memory;
9995 rtx *insnp, *bestinsnp = NULL;
9997 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10000 memory = ix86_safe_memory (first);
10001 cycles = result_ready_cost (first);
10002 mincycles = INT_MAX;
10004 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10005 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10006 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10008 enum attr_memory second_memory;
10009 int secondcycles, currentcycles;
10011 second_memory = ix86_safe_memory (*insnp);
10012 secondcycles = result_ready_cost (*insnp);
10013 currentcycles = abs (cycles - secondcycles);
10015 if (secondcycles >= 1 && cycles >= 1)
10017 /* Two read/modify/write instructions together takes two
10019 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10020 currentcycles += 2;
10022 /* Read modify/write instruction followed by read/modify
10023 takes one cycle longer. */
10024 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10025 && tmp != PENT_PAIR_UV
10026 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10027 currentcycles += 1;
10029 if (currentcycles < mincycles)
10030 bestinsnp = insnp, mincycles = currentcycles;
10036 /* Subroutines of ix86_sched_reorder. */
10039 ix86_sched_reorder_pentium (ready, e_ready)
10043 enum attr_pent_pair pair1, pair2;
10046 /* This wouldn't be necessary if Haifa knew that static insn ordering
10047 is important to which pipe an insn is issued to. So we have to make
10048 some minor rearrangements. */
10050 pair1 = ix86_safe_pent_pair (*e_ready);
10052 /* If the first insn is non-pairable, let it be. */
10053 if (pair1 == PENT_PAIR_NP)
10056 pair2 = PENT_PAIR_NP;
10059 /* If the first insn is UV or PV pairable, search for a PU
10060 insn to go with. */
10061 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10063 insnp = ix86_pent_find_pair (e_ready-1, ready,
10064 PENT_PAIR_PU, *e_ready);
10066 pair2 = PENT_PAIR_PU;
10069 /* If the first insn is PU or UV pairable, search for a PV
10070 insn to go with. */
10071 if (pair2 == PENT_PAIR_NP
10072 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10074 insnp = ix86_pent_find_pair (e_ready-1, ready,
10075 PENT_PAIR_PV, *e_ready);
10077 pair2 = PENT_PAIR_PV;
10080 /* If the first insn is pairable, search for a UV
10081 insn to go with. */
10082 if (pair2 == PENT_PAIR_NP)
10084 insnp = ix86_pent_find_pair (e_ready-1, ready,
10085 PENT_PAIR_UV, *e_ready);
10087 pair2 = PENT_PAIR_UV;
10090 if (pair2 == PENT_PAIR_NP)
10093 /* Found something! Decide if we need to swap the order. */
10094 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10095 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10096 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10097 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10098 ix86_reorder_insn (insnp, e_ready);
10100 ix86_reorder_insn (insnp, e_ready - 1);
10104 ix86_sched_reorder_ppro (ready, e_ready)
10109 enum attr_ppro_uops cur_uops;
10110 int issued_this_cycle;
10114 /* At this point .ppro.decode contains the state of the three
10115 decoders from last "cycle". That is, those insns that were
10116 actually independent. But here we're scheduling for the
10117 decoder, and we may find things that are decodable in the
10120 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10121 issued_this_cycle = 0;
10124 cur_uops = ix86_safe_ppro_uops (*insnp);
10126 /* If the decoders are empty, and we've a complex insn at the
10127 head of the priority queue, let it issue without complaint. */
10128 if (decode[0] == NULL)
10130 if (cur_uops == PPRO_UOPS_MANY)
10132 decode[0] = *insnp;
10136 /* Otherwise, search for a 2-4 uop unsn to issue. */
10137 while (cur_uops != PPRO_UOPS_FEW)
10139 if (insnp == ready)
10141 cur_uops = ix86_safe_ppro_uops (*--insnp);
10144 /* If so, move it to the head of the line. */
10145 if (cur_uops == PPRO_UOPS_FEW)
10146 ix86_reorder_insn (insnp, e_ready);
10148 /* Issue the head of the queue. */
10149 issued_this_cycle = 1;
10150 decode[0] = *e_ready--;
10153 /* Look for simple insns to fill in the other two slots. */
10154 for (i = 1; i < 3; ++i)
10155 if (decode[i] == NULL)
10157 if (ready >= e_ready)
10161 cur_uops = ix86_safe_ppro_uops (*insnp);
10162 while (cur_uops != PPRO_UOPS_ONE)
10164 if (insnp == ready)
10166 cur_uops = ix86_safe_ppro_uops (*--insnp);
10169 /* Found one. Move it to the head of the queue and issue it. */
10170 if (cur_uops == PPRO_UOPS_ONE)
10172 ix86_reorder_insn (insnp, e_ready);
10173 decode[i] = *e_ready--;
10174 issued_this_cycle++;
10178 /* ??? Didn't find one. Ideally, here we would do a lazy split
10179 of 2-uop insns, issue one and queue the other. */
10183 if (issued_this_cycle == 0)
10184 issued_this_cycle = 1;
10185 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10188 /* We are about to being issuing insns for this clock cycle.
10189 Override the default sort algorithm to better slot instructions. */
10191 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10192 FILE *dump ATTRIBUTE_UNUSED;
10193 int sched_verbose ATTRIBUTE_UNUSED;
10196 int clock_var ATTRIBUTE_UNUSED;
10198 int n_ready = *n_readyp;
10199 rtx *e_ready = ready + n_ready - 1;
10209 case PROCESSOR_PENTIUM:
10210 ix86_sched_reorder_pentium (ready, e_ready);
10213 case PROCESSOR_PENTIUMPRO:
10214 ix86_sched_reorder_ppro (ready, e_ready);
10219 return ix86_issue_rate ();
10222 /* We are about to issue INSN. Return the number of insns left on the
10223 ready queue that can be issued this cycle. */
10226 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10230 int can_issue_more;
10236 return can_issue_more - 1;
10238 case PROCESSOR_PENTIUMPRO:
10240 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10242 if (uops == PPRO_UOPS_MANY)
10245 ix86_dump_ppro_packet (dump);
10246 ix86_sched_data.ppro.decode[0] = insn;
10247 ix86_sched_data.ppro.decode[1] = NULL;
10248 ix86_sched_data.ppro.decode[2] = NULL;
10250 ix86_dump_ppro_packet (dump);
10251 ix86_sched_data.ppro.decode[0] = NULL;
10253 else if (uops == PPRO_UOPS_FEW)
10256 ix86_dump_ppro_packet (dump);
10257 ix86_sched_data.ppro.decode[0] = insn;
10258 ix86_sched_data.ppro.decode[1] = NULL;
10259 ix86_sched_data.ppro.decode[2] = NULL;
10263 for (i = 0; i < 3; ++i)
10264 if (ix86_sched_data.ppro.decode[i] == NULL)
10266 ix86_sched_data.ppro.decode[i] = insn;
10274 ix86_dump_ppro_packet (dump);
10275 ix86_sched_data.ppro.decode[0] = NULL;
10276 ix86_sched_data.ppro.decode[1] = NULL;
10277 ix86_sched_data.ppro.decode[2] = NULL;
10281 return --ix86_sched_data.ppro.issued_this_cycle;
10285 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10286 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10290 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10292 rtx dstref, srcref, dstreg, srcreg;
10296 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10298 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10302 /* Subroutine of above to actually do the updating by recursively walking
10306 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10308 rtx dstref, srcref, dstreg, srcreg;
10310 enum rtx_code code = GET_CODE (x);
10311 const char *format_ptr = GET_RTX_FORMAT (code);
10314 if (code == MEM && XEXP (x, 0) == dstreg)
10315 MEM_COPY_ATTRIBUTES (x, dstref);
10316 else if (code == MEM && XEXP (x, 0) == srcreg)
10317 MEM_COPY_ATTRIBUTES (x, srcref);
10319 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10321 if (*format_ptr == 'e')
10322 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10324 else if (*format_ptr == 'E')
10325 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10326 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10331 /* Compute the alignment given to a constant that is being placed in memory.
10332 EXP is the constant and ALIGN is the alignment that the object would
10334 The value of this function is used instead of that alignment to align
10338 ix86_constant_alignment (exp, align)
10342 if (TREE_CODE (exp) == REAL_CST)
10344 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10346 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10349 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10356 /* Compute the alignment for a static variable.
10357 TYPE is the data type, and ALIGN is the alignment that
10358 the object would ordinarily have. The value of this function is used
10359 instead of that alignment to align the object. */
10362 ix86_data_alignment (type, align)
10366 if (AGGREGATE_TYPE_P (type)
10367 && TYPE_SIZE (type)
10368 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10369 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10370 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10373 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10374 to 16byte boundary. */
10377 if (AGGREGATE_TYPE_P (type)
10378 && TYPE_SIZE (type)
10379 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10380 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10381 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10385 if (TREE_CODE (type) == ARRAY_TYPE)
10387 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10389 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10392 else if (TREE_CODE (type) == COMPLEX_TYPE)
10395 if (TYPE_MODE (type) == DCmode && align < 64)
10397 if (TYPE_MODE (type) == XCmode && align < 128)
10400 else if ((TREE_CODE (type) == RECORD_TYPE
10401 || TREE_CODE (type) == UNION_TYPE
10402 || TREE_CODE (type) == QUAL_UNION_TYPE)
10403 && TYPE_FIELDS (type))
10405 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10407 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10410 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10411 || TREE_CODE (type) == INTEGER_TYPE)
10413 if (TYPE_MODE (type) == DFmode && align < 64)
10415 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10422 /* Compute the alignment for a local variable.
10423 TYPE is the data type, and ALIGN is the alignment that
10424 the object would ordinarily have. The value of this macro is used
10425 instead of that alignment to align the object. */
10428 ix86_local_alignment (type, align)
10432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10433 to 16byte boundary. */
10436 if (AGGREGATE_TYPE_P (type)
10437 && TYPE_SIZE (type)
10438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10443 if (TREE_CODE (type) == ARRAY_TYPE)
10445 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10447 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10450 else if (TREE_CODE (type) == COMPLEX_TYPE)
10452 if (TYPE_MODE (type) == DCmode && align < 64)
10454 if (TYPE_MODE (type) == XCmode && align < 128)
10457 else if ((TREE_CODE (type) == RECORD_TYPE
10458 || TREE_CODE (type) == UNION_TYPE
10459 || TREE_CODE (type) == QUAL_UNION_TYPE)
10460 && TYPE_FIELDS (type))
10462 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10464 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10467 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10468 || TREE_CODE (type) == INTEGER_TYPE)
10471 if (TYPE_MODE (type) == DFmode && align < 64)
10473 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10479 /* Emit RTL insns to initialize the variable parts of a trampoline.
10480 FNADDR is an RTX for the address of the function's pure code.
10481 CXT is an RTX for the static chain value for the function. */
10483 x86_initialize_trampoline (tramp, fnaddr, cxt)
10484 rtx tramp, fnaddr, cxt;
10488 /* Compute offset from the end of the jmp to the target function. */
10489 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10490 plus_constant (tramp, 10),
10491 NULL_RTX, 1, OPTAB_DIRECT);
10492 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10493 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10494 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10495 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10496 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10497 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10502 /* Try to load address using shorter movl instead of movabs.
10503 We may want to support movq for kernel mode, but kernel does not use
10504 trampolines at the moment. */
10505 if (x86_64_zero_extended_value (fnaddr))
10507 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10508 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10509 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10510 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10511 gen_lowpart (SImode, fnaddr));
10516 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10517 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10518 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10522 /* Load static chain using movabs to r10. */
10523 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10524 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10525 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10528 /* Jump to the r11 */
10529 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10530 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10531 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10532 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10534 if (offset > TRAMPOLINE_SIZE)
10539 #define def_builtin(MASK, NAME, TYPE, CODE) \
10541 if ((MASK) & target_flags) \
10542 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10545 struct builtin_description
10547 const unsigned int mask;
10548 const enum insn_code icode;
10549 const char *const name;
10550 const enum ix86_builtins code;
10551 const enum rtx_code comparison;
10552 const unsigned int flag;
10555 static const struct builtin_description bdesc_comi[] =
10557 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10558 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10559 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10560 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10561 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10562 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10563 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10564 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10565 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10566 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10567 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10568 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10571 static const struct builtin_description bdesc_2arg[] =
10574 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10575 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10576 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10577 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10578 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10579 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10580 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10581 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10583 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10584 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10585 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10586 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10587 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10588 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10589 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10590 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10591 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10592 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10593 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10594 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10595 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10596 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10597 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10598 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10599 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10600 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10601 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10602 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10603 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10604 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10605 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10606 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10608 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10609 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10610 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10611 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10613 { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
10614 { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
10615 { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
10616 { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
10618 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10619 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10620 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10621 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10622 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10625 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10626 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10627 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10628 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10629 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10630 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10632 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10633 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10634 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10635 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10636 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10637 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10638 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10639 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10641 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10642 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10643 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10645 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10646 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10647 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10648 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10650 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10651 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10653 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10654 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10655 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10656 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10657 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10658 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10660 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10661 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10662 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10663 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10665 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10666 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10667 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10668 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10669 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10670 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10673 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10674 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10675 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10677 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10678 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10680 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10681 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10682 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10683 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10684 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10685 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10687 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10688 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10689 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10690 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10691 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10692 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10694 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10695 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10696 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10697 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10699 { MASK_SSE, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10700 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10704 static const struct builtin_description bdesc_1arg[] =
10706 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10707 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10709 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10710 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10711 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10713 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10714 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10715 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10716 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10721 ix86_init_builtins ()
10724 ix86_init_mmx_sse_builtins ();
10727 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
10728 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10731 ix86_init_mmx_sse_builtins ()
10733 const struct builtin_description * d;
10735 tree endlink = void_list_node;
10737 tree pchar_type_node = build_pointer_type (char_type_node);
10738 tree pfloat_type_node = build_pointer_type (float_type_node);
10739 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10740 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10743 tree int_ftype_v4sf_v4sf
10744 = build_function_type (integer_type_node,
10745 tree_cons (NULL_TREE, V4SF_type_node,
10746 tree_cons (NULL_TREE,
10749 tree v4si_ftype_v4sf_v4sf
10750 = build_function_type (V4SI_type_node,
10751 tree_cons (NULL_TREE, V4SF_type_node,
10752 tree_cons (NULL_TREE,
10755 /* MMX/SSE/integer conversions. */
10756 tree int_ftype_v4sf
10757 = build_function_type (integer_type_node,
10758 tree_cons (NULL_TREE, V4SF_type_node,
10760 tree int_ftype_v8qi
10761 = build_function_type (integer_type_node,
10762 tree_cons (NULL_TREE, V8QI_type_node,
10764 tree int_ftype_v2si
10765 = build_function_type (integer_type_node,
10766 tree_cons (NULL_TREE, V2SI_type_node,
10768 tree v2si_ftype_int
10769 = build_function_type (V2SI_type_node,
10770 tree_cons (NULL_TREE, integer_type_node,
10772 tree v4sf_ftype_v4sf_int
10773 = build_function_type (V4SF_type_node,
10774 tree_cons (NULL_TREE, V4SF_type_node,
10775 tree_cons (NULL_TREE, integer_type_node,
10777 tree v4sf_ftype_v4sf_v2si
10778 = build_function_type (V4SF_type_node,
10779 tree_cons (NULL_TREE, V4SF_type_node,
10780 tree_cons (NULL_TREE, V2SI_type_node,
10782 tree int_ftype_v4hi_int
10783 = build_function_type (integer_type_node,
10784 tree_cons (NULL_TREE, V4HI_type_node,
10785 tree_cons (NULL_TREE, integer_type_node,
10787 tree v4hi_ftype_v4hi_int_int
10788 = build_function_type (V4HI_type_node,
10789 tree_cons (NULL_TREE, V4HI_type_node,
10790 tree_cons (NULL_TREE, integer_type_node,
10791 tree_cons (NULL_TREE,
10794 /* Miscellaneous. */
10795 tree v8qi_ftype_v4hi_v4hi
10796 = build_function_type (V8QI_type_node,
10797 tree_cons (NULL_TREE, V4HI_type_node,
10798 tree_cons (NULL_TREE, V4HI_type_node,
10800 tree v4hi_ftype_v2si_v2si
10801 = build_function_type (V4HI_type_node,
10802 tree_cons (NULL_TREE, V2SI_type_node,
10803 tree_cons (NULL_TREE, V2SI_type_node,
10805 tree v4sf_ftype_v4sf_v4sf_int
10806 = build_function_type (V4SF_type_node,
10807 tree_cons (NULL_TREE, V4SF_type_node,
10808 tree_cons (NULL_TREE, V4SF_type_node,
10809 tree_cons (NULL_TREE,
10812 tree v4hi_ftype_v8qi_v8qi
10813 = build_function_type (V4HI_type_node,
10814 tree_cons (NULL_TREE, V8QI_type_node,
10815 tree_cons (NULL_TREE, V8QI_type_node,
10817 tree v2si_ftype_v4hi_v4hi
10818 = build_function_type (V2SI_type_node,
10819 tree_cons (NULL_TREE, V4HI_type_node,
10820 tree_cons (NULL_TREE, V4HI_type_node,
10822 tree v4hi_ftype_v4hi_int
10823 = build_function_type (V4HI_type_node,
10824 tree_cons (NULL_TREE, V4HI_type_node,
10825 tree_cons (NULL_TREE, integer_type_node,
10827 tree v4hi_ftype_v4hi_di
10828 = build_function_type (V4HI_type_node,
10829 tree_cons (NULL_TREE, V4HI_type_node,
10830 tree_cons (NULL_TREE,
10831 long_long_integer_type_node,
10833 tree v2si_ftype_v2si_di
10834 = build_function_type (V2SI_type_node,
10835 tree_cons (NULL_TREE, V2SI_type_node,
10836 tree_cons (NULL_TREE,
10837 long_long_integer_type_node,
10839 tree void_ftype_void
10840 = build_function_type (void_type_node, endlink);
10841 tree void_ftype_pchar_int
10842 = build_function_type (void_type_node,
10843 tree_cons (NULL_TREE, pchar_type_node,
10844 tree_cons (NULL_TREE, integer_type_node,
10846 tree void_ftype_unsigned
10847 = build_function_type (void_type_node,
10848 tree_cons (NULL_TREE, unsigned_type_node,
10850 tree unsigned_ftype_void
10851 = build_function_type (unsigned_type_node, endlink);
10853 = build_function_type (long_long_unsigned_type_node, endlink);
10855 = build_function_type (intTI_type_node, endlink);
10856 tree v2si_ftype_v4sf
10857 = build_function_type (V2SI_type_node,
10858 tree_cons (NULL_TREE, V4SF_type_node,
10860 /* Loads/stores. */
10861 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
10862 tree_cons (NULL_TREE, V8QI_type_node,
10863 tree_cons (NULL_TREE,
10866 tree void_ftype_v8qi_v8qi_pchar
10867 = build_function_type (void_type_node, maskmovq_args);
10868 tree v4sf_ftype_pfloat
10869 = build_function_type (V4SF_type_node,
10870 tree_cons (NULL_TREE, pfloat_type_node,
10872 tree v4sf_ftype_float
10873 = build_function_type (V4SF_type_node,
10874 tree_cons (NULL_TREE, float_type_node,
10876 tree v4sf_ftype_float_float_float_float
10877 = build_function_type (V4SF_type_node,
10878 tree_cons (NULL_TREE, float_type_node,
10879 tree_cons (NULL_TREE, float_type_node,
10880 tree_cons (NULL_TREE,
10882 tree_cons (NULL_TREE,
10885 /* @@@ the type is bogus */
10886 tree v4sf_ftype_v4sf_pv2si
10887 = build_function_type (V4SF_type_node,
10888 tree_cons (NULL_TREE, V4SF_type_node,
10889 tree_cons (NULL_TREE, pv2si_type_node,
10891 tree void_ftype_pv2si_v4sf
10892 = build_function_type (void_type_node,
10893 tree_cons (NULL_TREE, pv2si_type_node,
10894 tree_cons (NULL_TREE, V4SF_type_node,
10896 tree void_ftype_pfloat_v4sf
10897 = build_function_type (void_type_node,
10898 tree_cons (NULL_TREE, pfloat_type_node,
10899 tree_cons (NULL_TREE, V4SF_type_node,
10901 tree void_ftype_pdi_di
10902 = build_function_type (void_type_node,
10903 tree_cons (NULL_TREE, pdi_type_node,
10904 tree_cons (NULL_TREE,
10905 long_long_unsigned_type_node,
10907 /* Normal vector unops. */
10908 tree v4sf_ftype_v4sf
10909 = build_function_type (V4SF_type_node,
10910 tree_cons (NULL_TREE, V4SF_type_node,
10913 /* Normal vector binops. */
10914 tree v4sf_ftype_v4sf_v4sf
10915 = build_function_type (V4SF_type_node,
10916 tree_cons (NULL_TREE, V4SF_type_node,
10917 tree_cons (NULL_TREE, V4SF_type_node,
10919 tree v8qi_ftype_v8qi_v8qi
10920 = build_function_type (V8QI_type_node,
10921 tree_cons (NULL_TREE, V8QI_type_node,
10922 tree_cons (NULL_TREE, V8QI_type_node,
10924 tree v4hi_ftype_v4hi_v4hi
10925 = build_function_type (V4HI_type_node,
10926 tree_cons (NULL_TREE, V4HI_type_node,
10927 tree_cons (NULL_TREE, V4HI_type_node,
10929 tree v2si_ftype_v2si_v2si
10930 = build_function_type (V2SI_type_node,
10931 tree_cons (NULL_TREE, V2SI_type_node,
10932 tree_cons (NULL_TREE, V2SI_type_node,
10934 tree ti_ftype_ti_ti
10935 = build_function_type (intTI_type_node,
10936 tree_cons (NULL_TREE, intTI_type_node,
10937 tree_cons (NULL_TREE, intTI_type_node,
10939 tree di_ftype_di_di
10940 = build_function_type (long_long_unsigned_type_node,
10941 tree_cons (NULL_TREE, long_long_unsigned_type_node,
10942 tree_cons (NULL_TREE,
10943 long_long_unsigned_type_node,
10946 tree v2si_ftype_v2sf
10947 = build_function_type (V2SI_type_node,
10948 tree_cons (NULL_TREE, V2SF_type_node,
10950 tree v2sf_ftype_v2si
10951 = build_function_type (V2SF_type_node,
10952 tree_cons (NULL_TREE, V2SI_type_node,
10954 tree v2si_ftype_v2si
10955 = build_function_type (V2SI_type_node,
10956 tree_cons (NULL_TREE, V2SI_type_node,
10958 tree v2sf_ftype_v2sf
10959 = build_function_type (V2SF_type_node,
10960 tree_cons (NULL_TREE, V2SF_type_node,
10962 tree v2sf_ftype_v2sf_v2sf
10963 = build_function_type (V2SF_type_node,
10964 tree_cons (NULL_TREE, V2SF_type_node,
10965 tree_cons (NULL_TREE,
10968 tree v2si_ftype_v2sf_v2sf
10969 = build_function_type (V2SI_type_node,
10970 tree_cons (NULL_TREE, V2SF_type_node,
10971 tree_cons (NULL_TREE,
10975 tree void_ftype_pchar
10976 = build_function_type (void_type_node,
10977 tree_cons (NULL_TREE, pchar_type_node,
10980 /* Add all builtins that are more or less simple operations on two
10982 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
10984 /* Use one of the operands; the target can have a different mode for
10985 mask-generating compares. */
10986 enum machine_mode mode;
10991 mode = insn_data[d->icode].operand[1].mode;
10996 type = v4sf_ftype_v4sf_v4sf;
10999 type = v8qi_ftype_v8qi_v8qi;
11002 type = v4hi_ftype_v4hi_v4hi;
11005 type = v2si_ftype_v2si_v2si;
11008 type = ti_ftype_ti_ti;
11011 type = di_ftype_di_di;
11018 /* Override for comparisons. */
11019 if (d->icode == CODE_FOR_maskcmpv4sf3
11020 || d->icode == CODE_FOR_maskncmpv4sf3
11021 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11022 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11023 type = v4si_ftype_v4sf_v4sf;
11025 def_builtin (d->mask, d->name, type, d->code);
11028 /* Add the remaining MMX insns with somewhat more complicated types. */
11029 def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
11030 def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
11031 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11032 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11033 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11034 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11035 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11036 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11037 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11039 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11040 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11041 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11043 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11044 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11046 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11047 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11049 /* comi/ucomi insns. */
11050 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11051 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11053 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11054 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11055 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11057 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11058 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11059 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11060 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11061 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11062 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11064 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11065 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11067 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11069 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11070 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11071 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11072 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11073 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11074 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11076 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11077 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11078 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11079 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11081 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11082 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11083 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11084 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11086 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11087 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
11089 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11091 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11092 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11093 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11094 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11095 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11096 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11098 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11100 /* Original 3DNow! */
11101 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11102 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11103 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11104 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11105 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11106 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11107 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11108 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11109 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11110 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11111 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11112 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11113 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11114 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11115 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11116 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11117 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11118 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11119 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11120 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11121 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
11122 def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
11124 /* 3DNow! extension as used in the Athlon CPU. */
11125 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11126 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11127 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11128 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11129 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11130 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11132 /* Composite intrinsics. */
11133 def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
11134 def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
11135 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
11136 def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
11137 def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
11138 def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
11139 def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
11142 /* Errors in the source file can cause expand_expr to return const0_rtx
11143 where we expect a vector. To avoid crashing, use one of the vector
11144 clear instructions. */
11146 safe_vector_operand (x, mode)
11148 enum machine_mode mode;
11150 if (x != const0_rtx)
11152 x = gen_reg_rtx (mode);
11154 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11155 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11156 : gen_rtx_SUBREG (DImode, x, 0)));
11158 emit_insn (gen_sse_clrti (mode == TImode ? x
11159 : gen_rtx_SUBREG (TImode, x, 0)));
11163 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11166 ix86_expand_binop_builtin (icode, arglist, target)
11167 enum insn_code icode;
11172 tree arg0 = TREE_VALUE (arglist);
11173 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11174 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11175 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11176 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11177 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11178 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11180 if (VECTOR_MODE_P (mode0))
11181 op0 = safe_vector_operand (op0, mode0);
11182 if (VECTOR_MODE_P (mode1))
11183 op1 = safe_vector_operand (op1, mode1);
11186 || GET_MODE (target) != tmode
11187 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11188 target = gen_reg_rtx (tmode);
11190 /* In case the insn wants input operands in modes different from
11191 the result, abort. */
11192 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11195 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11196 op0 = copy_to_mode_reg (mode0, op0);
11197 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11198 op1 = copy_to_mode_reg (mode1, op1);
11200 pat = GEN_FCN (icode) (target, op0, op1);
11207 /* Subroutine of ix86_expand_builtin to take care of stores. */
11210 ix86_expand_store_builtin (icode, arglist, shuffle)
11211 enum insn_code icode;
11216 tree arg0 = TREE_VALUE (arglist);
11217 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11218 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11219 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11220 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11221 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11223 if (VECTOR_MODE_P (mode1))
11224 op1 = safe_vector_operand (op1, mode1);
11226 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11227 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11228 op1 = copy_to_mode_reg (mode1, op1);
11230 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
11231 pat = GEN_FCN (icode) (op0, op1);
11237 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
11240 ix86_expand_unop_builtin (icode, arglist, target, do_load)
11241 enum insn_code icode;
11247 tree arg0 = TREE_VALUE (arglist);
11248 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11249 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11250 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11253 || GET_MODE (target) != tmode
11254 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11255 target = gen_reg_rtx (tmode);
11257 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11260 if (VECTOR_MODE_P (mode0))
11261 op0 = safe_vector_operand (op0, mode0);
11263 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11264 op0 = copy_to_mode_reg (mode0, op0);
11267 pat = GEN_FCN (icode) (target, op0);
11274 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11275 sqrtss, rsqrtss, rcpss. */
11278 ix86_expand_unop1_builtin (icode, arglist, target)
11279 enum insn_code icode;
11284 tree arg0 = TREE_VALUE (arglist);
11285 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11286 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11287 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11290 || GET_MODE (target) != tmode
11291 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11292 target = gen_reg_rtx (tmode);
11294 if (VECTOR_MODE_P (mode0))
11295 op0 = safe_vector_operand (op0, mode0);
11297 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11298 op0 = copy_to_mode_reg (mode0, op0);
11300 pat = GEN_FCN (icode) (target, op0, op0);
11307 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11310 ix86_expand_sse_compare (d, arglist, target)
11311 const struct builtin_description *d;
11316 tree arg0 = TREE_VALUE (arglist);
11317 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11318 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11319 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11321 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11322 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11323 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11324 enum rtx_code comparison = d->comparison;
11326 if (VECTOR_MODE_P (mode0))
11327 op0 = safe_vector_operand (op0, mode0);
11328 if (VECTOR_MODE_P (mode1))
11329 op1 = safe_vector_operand (op1, mode1);
11331 /* Swap operands if we have a comparison that isn't available in
11335 rtx tmp = gen_reg_rtx (mode1);
11336 emit_move_insn (tmp, op1);
11342 || GET_MODE (target) != tmode
11343 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11344 target = gen_reg_rtx (tmode);
11346 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11347 op0 = copy_to_mode_reg (mode0, op0);
11348 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11349 op1 = copy_to_mode_reg (mode1, op1);
11351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11352 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11359 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
11362 ix86_expand_sse_comi (d, arglist, target)
11363 const struct builtin_description *d;
11368 tree arg0 = TREE_VALUE (arglist);
11369 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11370 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11371 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11373 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11374 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11375 enum rtx_code comparison = d->comparison;
11377 if (VECTOR_MODE_P (mode0))
11378 op0 = safe_vector_operand (op0, mode0);
11379 if (VECTOR_MODE_P (mode1))
11380 op1 = safe_vector_operand (op1, mode1);
11382 /* Swap operands if we have a comparison that isn't available in
11391 target = gen_reg_rtx (SImode);
11392 emit_move_insn (target, const0_rtx);
11393 target = gen_rtx_SUBREG (QImode, target, 0);
11395 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11396 op0 = copy_to_mode_reg (mode0, op0);
11397 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11398 op1 = copy_to_mode_reg (mode1, op1);
11400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11401 pat = GEN_FCN (d->icode) (op0, op1, op2);
11405 emit_insn (gen_rtx_SET (VOIDmode,
11406 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11407 gen_rtx_fmt_ee (comparison, QImode,
11408 gen_rtx_REG (CCmode, FLAGS_REG),
11414 /* Expand an expression EXP that calls a built-in function,
11415 with result going to TARGET if that's convenient
11416 (and in mode MODE if that's convenient).
11417 SUBTARGET may be used as the target for computing one of EXP's operands.
11418 IGNORE is nonzero if the value is to be ignored. */
11421 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11424 rtx subtarget ATTRIBUTE_UNUSED;
11425 enum machine_mode mode ATTRIBUTE_UNUSED;
11426 int ignore ATTRIBUTE_UNUSED;
11428 const struct builtin_description *d;
11430 enum insn_code icode;
11431 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11432 tree arglist = TREE_OPERAND (exp, 1);
11433 tree arg0, arg1, arg2, arg3;
11434 rtx op0, op1, op2, pat;
11435 enum machine_mode tmode, mode0, mode1, mode2;
11436 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11440 case IX86_BUILTIN_EMMS:
11441 emit_insn (gen_emms ());
11444 case IX86_BUILTIN_SFENCE:
11445 emit_insn (gen_sfence ());
11448 case IX86_BUILTIN_M_FROM_INT:
11449 target = gen_reg_rtx (DImode);
11450 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11451 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
11454 case IX86_BUILTIN_M_TO_INT:
11455 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11456 op0 = copy_to_mode_reg (DImode, op0);
11457 target = gen_reg_rtx (SImode);
11458 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
11461 case IX86_BUILTIN_PEXTRW:
11462 icode = CODE_FOR_mmx_pextrw;
11463 arg0 = TREE_VALUE (arglist);
11464 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11465 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11466 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11467 tmode = insn_data[icode].operand[0].mode;
11468 mode0 = insn_data[icode].operand[1].mode;
11469 mode1 = insn_data[icode].operand[2].mode;
11471 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11472 op0 = copy_to_mode_reg (mode0, op0);
11473 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11475 /* @@@ better error message */
11476 error ("selector must be an immediate");
11480 || GET_MODE (target) != tmode
11481 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11482 target = gen_reg_rtx (tmode);
11483 pat = GEN_FCN (icode) (target, op0, op1);
11489 case IX86_BUILTIN_PINSRW:
11490 icode = CODE_FOR_mmx_pinsrw;
11491 arg0 = TREE_VALUE (arglist);
11492 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11493 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11494 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11495 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11496 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11497 tmode = insn_data[icode].operand[0].mode;
11498 mode0 = insn_data[icode].operand[1].mode;
11499 mode1 = insn_data[icode].operand[2].mode;
11500 mode2 = insn_data[icode].operand[3].mode;
11502 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11503 op0 = copy_to_mode_reg (mode0, op0);
11504 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11505 op1 = copy_to_mode_reg (mode1, op1);
11506 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11508 /* @@@ better error message */
11509 error ("selector must be an immediate");
11513 || GET_MODE (target) != tmode
11514 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11515 target = gen_reg_rtx (tmode);
11516 pat = GEN_FCN (icode) (target, op0, op1, op2);
11522 case IX86_BUILTIN_MASKMOVQ:
11523 icode = CODE_FOR_mmx_maskmovq;
11524 /* Note the arg order is different from the operand order. */
11525 arg1 = TREE_VALUE (arglist);
11526 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11527 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11528 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11529 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11530 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11531 mode0 = insn_data[icode].operand[0].mode;
11532 mode1 = insn_data[icode].operand[1].mode;
11533 mode2 = insn_data[icode].operand[2].mode;
11535 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11536 op0 = copy_to_mode_reg (mode0, op0);
11537 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11538 op1 = copy_to_mode_reg (mode1, op1);
11539 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11540 op2 = copy_to_mode_reg (mode2, op2);
11541 pat = GEN_FCN (icode) (op0, op1, op2);
11547 case IX86_BUILTIN_SQRTSS:
11548 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11549 case IX86_BUILTIN_RSQRTSS:
11550 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11551 case IX86_BUILTIN_RCPSS:
11552 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11554 case IX86_BUILTIN_LOADAPS:
11555 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11557 case IX86_BUILTIN_LOADUPS:
11558 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11560 case IX86_BUILTIN_STOREAPS:
11561 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
11562 case IX86_BUILTIN_STOREUPS:
11563 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
11565 case IX86_BUILTIN_LOADSS:
11566 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11568 case IX86_BUILTIN_STORESS:
11569 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
11571 case IX86_BUILTIN_LOADHPS:
11572 case IX86_BUILTIN_LOADLPS:
11573 icode = (fcode == IX86_BUILTIN_LOADHPS
11574 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11575 arg0 = TREE_VALUE (arglist);
11576 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11577 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11578 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11579 tmode = insn_data[icode].operand[0].mode;
11580 mode0 = insn_data[icode].operand[1].mode;
11581 mode1 = insn_data[icode].operand[2].mode;
11583 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11584 op0 = copy_to_mode_reg (mode0, op0);
11585 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11587 || GET_MODE (target) != tmode
11588 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11589 target = gen_reg_rtx (tmode);
11590 pat = GEN_FCN (icode) (target, op0, op1);
11596 case IX86_BUILTIN_STOREHPS:
11597 case IX86_BUILTIN_STORELPS:
11598 icode = (fcode == IX86_BUILTIN_STOREHPS
11599 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11600 arg0 = TREE_VALUE (arglist);
11601 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11602 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11603 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11604 mode0 = insn_data[icode].operand[1].mode;
11605 mode1 = insn_data[icode].operand[2].mode;
11607 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11608 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11609 op1 = copy_to_mode_reg (mode1, op1);
11611 pat = GEN_FCN (icode) (op0, op0, op1);
11617 case IX86_BUILTIN_MOVNTPS:
11618 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
11619 case IX86_BUILTIN_MOVNTQ:
11620 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
11622 case IX86_BUILTIN_LDMXCSR:
11623 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11624 target = assign_386_stack_local (SImode, 0);
11625 emit_move_insn (target, op0);
11626 emit_insn (gen_ldmxcsr (target));
11629 case IX86_BUILTIN_STMXCSR:
11630 target = assign_386_stack_local (SImode, 0);
11631 emit_insn (gen_stmxcsr (target));
11632 return copy_to_mode_reg (SImode, target);
11634 case IX86_BUILTIN_PREFETCH:
11635 icode = CODE_FOR_prefetch_sse;
11636 arg0 = TREE_VALUE (arglist);
11637 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11640 mode0 = insn_data[icode].operand[0].mode;
11641 mode1 = insn_data[icode].operand[1].mode;
11643 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11645 /* @@@ better error message */
11646 error ("selector must be an immediate");
11650 op0 = copy_to_mode_reg (Pmode, op0);
11651 pat = GEN_FCN (icode) (op0, op1);
11657 case IX86_BUILTIN_SHUFPS:
11658 icode = CODE_FOR_sse_shufps;
11659 arg0 = TREE_VALUE (arglist);
11660 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11661 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11662 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11663 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11664 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11665 tmode = insn_data[icode].operand[0].mode;
11666 mode0 = insn_data[icode].operand[1].mode;
11667 mode1 = insn_data[icode].operand[2].mode;
11668 mode2 = insn_data[icode].operand[3].mode;
11670 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11671 op0 = copy_to_mode_reg (mode0, op0);
11672 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11673 op1 = copy_to_mode_reg (mode1, op1);
11674 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11676 /* @@@ better error message */
11677 error ("mask must be an immediate");
11681 || GET_MODE (target) != tmode
11682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11683 target = gen_reg_rtx (tmode);
11684 pat = GEN_FCN (icode) (target, op0, op1, op2);
11690 case IX86_BUILTIN_PSHUFW:
11691 icode = CODE_FOR_mmx_pshufw;
11692 arg0 = TREE_VALUE (arglist);
11693 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11694 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11695 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11696 tmode = insn_data[icode].operand[0].mode;
11697 mode1 = insn_data[icode].operand[1].mode;
11698 mode2 = insn_data[icode].operand[2].mode;
11700 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11701 op0 = copy_to_mode_reg (mode1, op0);
11702 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11704 /* @@@ better error message */
11705 error ("mask must be an immediate");
11709 || GET_MODE (target) != tmode
11710 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11711 target = gen_reg_rtx (tmode);
11712 pat = GEN_FCN (icode) (target, op0, op1);
11718 case IX86_BUILTIN_FEMMS:
11719 emit_insn (gen_femms ());
11722 case IX86_BUILTIN_PAVGUSB:
11723 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11725 case IX86_BUILTIN_PF2ID:
11726 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11728 case IX86_BUILTIN_PFACC:
11729 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11731 case IX86_BUILTIN_PFADD:
11732 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11734 case IX86_BUILTIN_PFCMPEQ:
11735 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11737 case IX86_BUILTIN_PFCMPGE:
11738 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11740 case IX86_BUILTIN_PFCMPGT:
11741 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11743 case IX86_BUILTIN_PFMAX:
11744 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11746 case IX86_BUILTIN_PFMIN:
11747 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11749 case IX86_BUILTIN_PFMUL:
11750 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11752 case IX86_BUILTIN_PFRCP:
11753 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11755 case IX86_BUILTIN_PFRCPIT1:
11756 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11758 case IX86_BUILTIN_PFRCPIT2:
11759 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11761 case IX86_BUILTIN_PFRSQIT1:
11762 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11764 case IX86_BUILTIN_PFRSQRT:
11765 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11767 case IX86_BUILTIN_PFSUB:
11768 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11770 case IX86_BUILTIN_PFSUBR:
11771 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11773 case IX86_BUILTIN_PI2FD:
11774 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11776 case IX86_BUILTIN_PMULHRW:
11777 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11779 case IX86_BUILTIN_PREFETCH_3DNOW:
11780 icode = CODE_FOR_prefetch_3dnow;
11781 arg0 = TREE_VALUE (arglist);
11782 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11783 mode0 = insn_data[icode].operand[0].mode;
11784 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11790 case IX86_BUILTIN_PREFETCHW:
11791 icode = CODE_FOR_prefetchw;
11792 arg0 = TREE_VALUE (arglist);
11793 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11794 mode0 = insn_data[icode].operand[0].mode;
11795 pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
11801 case IX86_BUILTIN_PF2IW:
11802 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11804 case IX86_BUILTIN_PFNACC:
11805 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11807 case IX86_BUILTIN_PFPNACC:
11808 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11810 case IX86_BUILTIN_PI2FW:
11811 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11813 case IX86_BUILTIN_PSWAPDSI:
11814 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11816 case IX86_BUILTIN_PSWAPDSF:
11817 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11819 /* Composite intrinsics. */
11820 case IX86_BUILTIN_SETPS1:
11821 target = assign_386_stack_local (SFmode, 0);
11822 arg0 = TREE_VALUE (arglist);
11823 emit_move_insn (adjust_address (target, SFmode, 0),
11824 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11825 op0 = gen_reg_rtx (V4SFmode);
11826 emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
11827 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
11830 case IX86_BUILTIN_SETPS:
11831 target = assign_386_stack_local (V4SFmode, 0);
11832 arg0 = TREE_VALUE (arglist);
11833 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11834 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11835 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
11836 emit_move_insn (adjust_address (target, SFmode, 0),
11837 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
11838 emit_move_insn (adjust_address (target, SFmode, 4),
11839 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
11840 emit_move_insn (adjust_address (target, SFmode, 8),
11841 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
11842 emit_move_insn (adjust_address (target, SFmode, 12),
11843 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
11844 op0 = gen_reg_rtx (V4SFmode);
11845 emit_insn (gen_sse_movaps (op0, target));
11848 case IX86_BUILTIN_CLRPS:
11849 target = gen_reg_rtx (TImode);
11850 emit_insn (gen_sse_clrti (target));
11853 case IX86_BUILTIN_LOADRPS:
11854 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
11855 gen_reg_rtx (V4SFmode), 1);
11856 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
11859 case IX86_BUILTIN_LOADPS1:
11860 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
11861 gen_reg_rtx (V4SFmode), 1);
11862 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
11865 case IX86_BUILTIN_STOREPS1:
11866 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
11867 case IX86_BUILTIN_STORERPS:
11868 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
11870 case IX86_BUILTIN_MMX_ZERO:
11871 target = gen_reg_rtx (DImode);
11872 emit_insn (gen_mmx_clrdi (target));
11879 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11880 if (d->code == fcode)
11882 /* Compares are treated specially. */
11883 if (d->icode == CODE_FOR_maskcmpv4sf3
11884 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11885 || d->icode == CODE_FOR_maskncmpv4sf3
11886 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11887 return ix86_expand_sse_compare (d, arglist, target);
11889 return ix86_expand_binop_builtin (d->icode, arglist, target);
11892 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
11893 if (d->code == fcode)
11894 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
11896 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11897 if (d->code == fcode)
11898 return ix86_expand_sse_comi (d, arglist, target);
11900 /* @@@ Should really do something sensible here. */
11904 /* Store OPERAND to the memory after reload is completed. This means
11905 that we can't easily use assign_stack_local. */
11907 ix86_force_to_memory (mode, operand)
11908 enum machine_mode mode;
11912 if (!reload_completed)
11914 if (TARGET_64BIT && TARGET_RED_ZONE)
11916 result = gen_rtx_MEM (mode,
11917 gen_rtx_PLUS (Pmode,
11919 GEN_INT (-RED_ZONE_SIZE)));
11920 emit_move_insn (result, operand);
11922 else if (TARGET_64BIT && !TARGET_RED_ZONE)
11928 operand = gen_lowpart (DImode, operand);
11932 gen_rtx_SET (VOIDmode,
11933 gen_rtx_MEM (DImode,
11934 gen_rtx_PRE_DEC (DImode,
11935 stack_pointer_rtx)),
11941 result = gen_rtx_MEM (mode, stack_pointer_rtx);
11950 split_di (&operand, 1, operands, operands + 1);
11952 gen_rtx_SET (VOIDmode,
11953 gen_rtx_MEM (SImode,
11954 gen_rtx_PRE_DEC (Pmode,
11955 stack_pointer_rtx)),
11958 gen_rtx_SET (VOIDmode,
11959 gen_rtx_MEM (SImode,
11960 gen_rtx_PRE_DEC (Pmode,
11961 stack_pointer_rtx)),
11966 /* It is better to store HImodes as SImodes. */
11967 if (!TARGET_PARTIAL_REG_STALL)
11968 operand = gen_lowpart (SImode, operand);
11972 gen_rtx_SET (VOIDmode,
11973 gen_rtx_MEM (GET_MODE (operand),
11974 gen_rtx_PRE_DEC (SImode,
11975 stack_pointer_rtx)),
11981 result = gen_rtx_MEM (mode, stack_pointer_rtx);
11986 /* Free operand from the memory. */
11988 ix86_free_from_memory (mode)
11989 enum machine_mode mode;
11991 if (!TARGET_64BIT || !TARGET_RED_ZONE)
11995 if (mode == DImode || TARGET_64BIT)
11997 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12001 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12002 to pop or add instruction if registers are available. */
12003 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12004 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12009 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12010 QImode must go into class Q_REGS.
12011 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12012 movdf to do mem-to-mem moves through integer regs. */
12014 ix86_preferred_reload_class (x, class)
12016 enum reg_class class;
12018 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12020 /* SSE can't load any constant directly yet. */
12021 if (SSE_CLASS_P (class))
12023 /* Floats can load 0 and 1. */
12024 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12026 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12027 if (MAYBE_SSE_CLASS_P (class))
12028 return (reg_class_subset_p (class, GENERAL_REGS)
12029 ? GENERAL_REGS : FLOAT_REGS);
12033 /* General regs can load everything. */
12034 if (reg_class_subset_p (class, GENERAL_REGS))
12035 return GENERAL_REGS;
12036 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12037 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12040 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12042 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12047 /* If we are copying between general and FP registers, we need a memory
12048 location. The same is true for SSE and MMX registers.
12050 The macro can't work reliably when one of the CLASSES is class containing
12051 registers from multiple units (SSE, MMX, integer). We avoid this by never
12052 combining those units in single alternative in the machine description.
12053 Ensure that this constraint holds to avoid unexpected surprises.
12055 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12056 enforce these sanity checks. */
12058 ix86_secondary_memory_needed (class1, class2, mode, strict)
12059 enum reg_class class1, class2;
12060 enum machine_mode mode;
12063 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12064 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12065 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12066 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12067 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12068 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12075 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12076 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12077 && (mode) != SImode)
12078 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12079 && (mode) != SImode));
12081 /* Return the cost of moving data from a register in class CLASS1 to
12082 one in class CLASS2.
12084 It is not required that the cost always equal 2 when FROM is the same as TO;
12085 on some machines it is expensive to move between registers if they are not
12086 general registers. */
12088 ix86_register_move_cost (mode, class1, class2)
12089 enum machine_mode mode;
12090 enum reg_class class1, class2;
12092 /* In case we require secondary memory, compute cost of the store followed
12093 by load. In case of copying from general_purpose_register we may emit
12094 multiple stores followed by single load causing memory size mismatch
12095 stall. Count this as arbitarily high cost of 20. */
12096 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12099 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12101 return (MEMORY_MOVE_COST (mode, class1, 0)
12102 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12104 /* Moves between SSE/MMX and integer unit are expensive. */
12105 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12106 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12107 return ix86_cost->mmxsse_to_integer;
12108 if (MAYBE_FLOAT_CLASS_P (class1))
12109 return ix86_cost->fp_move;
12110 if (MAYBE_SSE_CLASS_P (class1))
12111 return ix86_cost->sse_move;
12112 if (MAYBE_MMX_CLASS_P (class1))
12113 return ix86_cost->mmx_move;
12117 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12119 ix86_hard_regno_mode_ok (regno, mode)
12121 enum machine_mode mode;
12123 /* Flags and only flags can only hold CCmode values. */
12124 if (CC_REGNO_P (regno))
12125 return GET_MODE_CLASS (mode) == MODE_CC;
12126 if (GET_MODE_CLASS (mode) == MODE_CC
12127 || GET_MODE_CLASS (mode) == MODE_RANDOM
12128 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12130 if (FP_REGNO_P (regno))
12131 return VALID_FP_MODE_P (mode);
12132 if (SSE_REGNO_P (regno))
12133 return VALID_SSE_REG_MODE (mode);
12134 if (MMX_REGNO_P (regno))
12135 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12136 /* We handle both integer and floats in the general purpose registers.
12137 In future we should be able to handle vector modes as well. */
12138 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12140 /* Take care for QImode values - they can be in non-QI regs, but then
12141 they do cause partial register stalls. */
12142 if (regno < 4 || mode != QImode || TARGET_64BIT)
12144 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12147 /* Return the cost of moving data of mode M between a
12148 register and memory. A value of 2 is the default; this cost is
12149 relative to those in `REGISTER_MOVE_COST'.
12151 If moving between registers and memory is more expensive than
12152 between two registers, you should define this macro to express the
12155 Model also increased moving costs of QImode registers in non
12159 ix86_memory_move_cost (mode, class, in)
12160 enum machine_mode mode;
12161 enum reg_class class;
12164 if (FLOAT_CLASS_P (class))
12182 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12184 if (SSE_CLASS_P (class))
12187 switch (GET_MODE_SIZE (mode))
12201 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12203 if (MMX_CLASS_P (class))
12206 switch (GET_MODE_SIZE (mode))
12217 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12219 switch (GET_MODE_SIZE (mode))
12223 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12224 : ix86_cost->movzbl_load);
12226 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12227 : ix86_cost->int_store[0] + 4);
12230 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12232 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12233 if (mode == TFmode)
12235 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12236 * (int) GET_MODE_SIZE (mode) / 4);
12240 #ifdef DO_GLOBAL_CTORS_BODY
12242 ix86_svr3_asm_out_constructor (symbol, priority)
12244 int priority ATTRIBUTE_UNUSED;
12247 fputs ("\tpushl $", asm_out_file);
12248 assemble_name (asm_out_file, XSTR (symbol, 0));
12249 fputc ('\n', asm_out_file);