1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
525 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* In case the average insn count for single function invocation is
528 lower than this constant, emit fast (but longer) prologue and
530 #define FAST_PROLOGUE_INSN_COUNT 20
532 /* Set by prologue expander and used by epilogue expander to determine
534 static int use_fast_prologue_epilogue;
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
547 AREG, DREG, CREG, BREG,
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 /* The encoding characters for the four TLS models present in ELF. */
676 static char const tls_model_chars[] = " GLil";
678 #define MAX_386_STACK_LOCALS 3
679 /* Size of the register save area. */
680 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
682 /* Define the structure for the machine field in struct function. */
684 struct stack_local_entry GTY(())
689 struct stack_local_entry *next;
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string; /* for -mtune=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static int ix86_address_cost PARAMS ((rtx));
844 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
845 static rtx ix86_delegitimize_address PARAMS ((rtx));
847 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
848 static const char *ix86_strip_name_encoding PARAMS ((const char *))
851 struct builtin_description;
852 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
854 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
856 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
857 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
858 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
859 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
860 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
861 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
862 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
866 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
868 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
872 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
873 static int ix86_save_reg PARAMS ((unsigned int, int));
874 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
875 static int ix86_comp_type_attributes PARAMS ((tree, tree));
876 static int ix86_fntype_regparm PARAMS ((tree));
877 const struct attribute_spec ix86_attribute_table[];
878 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
879 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
880 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static int ix86_value_regno PARAMS ((enum machine_mode));
882 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
883 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
884 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
885 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
886 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
888 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
889 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
892 /* Register class used for passing given 64bit part of the argument.
893 These represent classes as documented by the PS ABI, with the exception
894 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
895 use SF or DFmode move instead of DImode to avoid reformatting penalties.
897 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
898 whenever possible (upper half does contain padding).
900 enum x86_64_reg_class
903 X86_64_INTEGER_CLASS,
904 X86_64_INTEGERSI_CLASS,
913 static const char * const x86_64_reg_class_name[] =
914 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916 #define MAX_CLASSES 4
917 static int classify_argument PARAMS ((enum machine_mode, tree,
918 enum x86_64_reg_class [MAX_CLASSES],
920 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
922 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
925 enum x86_64_reg_class));
927 /* Table of constants used by fldpi, fldln2, etc... */
928 static REAL_VALUE_TYPE ext_80387_constants_table [5];
929 static bool ext_80387_constants_init = 0;
930 static void init_ext_80387_constants PARAMS ((void));
932 /* Initialize the GCC target structure. */
933 #undef TARGET_ATTRIBUTE_TABLE
934 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
935 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
936 # undef TARGET_MERGE_DECL_ATTRIBUTES
937 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
940 #undef TARGET_COMP_TYPE_ATTRIBUTES
941 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
943 #undef TARGET_INIT_BUILTINS
944 #define TARGET_INIT_BUILTINS ix86_init_builtins
946 #undef TARGET_EXPAND_BUILTIN
947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
949 #undef TARGET_ASM_FUNCTION_EPILOGUE
950 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
952 #undef TARGET_ASM_OPEN_PAREN
953 #define TARGET_ASM_OPEN_PAREN ""
954 #undef TARGET_ASM_CLOSE_PAREN
955 #define TARGET_ASM_CLOSE_PAREN ""
957 #undef TARGET_ASM_ALIGNED_HI_OP
958 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959 #undef TARGET_ASM_ALIGNED_SI_OP
960 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
962 #undef TARGET_ASM_ALIGNED_DI_OP
963 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
966 #undef TARGET_ASM_UNALIGNED_HI_OP
967 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968 #undef TARGET_ASM_UNALIGNED_SI_OP
969 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970 #undef TARGET_ASM_UNALIGNED_DI_OP
971 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
973 #undef TARGET_SCHED_ADJUST_COST
974 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975 #undef TARGET_SCHED_ISSUE_RATE
976 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977 #undef TARGET_SCHED_VARIABLE_ISSUE
978 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979 #undef TARGET_SCHED_INIT
980 #define TARGET_SCHED_INIT ix86_sched_init
981 #undef TARGET_SCHED_REORDER
982 #define TARGET_SCHED_REORDER ix86_sched_reorder
983 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
984 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985 ia32_use_dfa_pipeline_interface
986 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988 ia32_multipass_dfa_lookahead
990 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
991 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
994 #undef TARGET_HAVE_TLS
995 #define TARGET_HAVE_TLS true
997 #undef TARGET_CANNOT_FORCE_CONST_MEM
998 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1000 #undef TARGET_DELEGITIMIZE_ADDRESS
1001 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1003 #undef TARGET_MS_BITFIELD_LAYOUT_P
1004 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1006 #undef TARGET_ASM_OUTPUT_MI_THUNK
1007 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1008 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1011 #undef TARGET_RTX_COSTS
1012 #define TARGET_RTX_COSTS ix86_rtx_costs
1013 #undef TARGET_ADDRESS_COST
1014 #define TARGET_ADDRESS_COST ix86_address_cost
1016 struct gcc_target targetm = TARGET_INITIALIZER;
1018 /* The svr4 ABI for the i386 says that records and unions are returned
1020 #ifndef DEFAULT_PCC_STRUCT_RETURN
1021 #define DEFAULT_PCC_STRUCT_RETURN 1
1024 /* Sometimes certain combinations of command options do not make
1025 sense on a particular target machine. You can define a macro
1026 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1027 defined, is executed once just after all the command options have
1030 Don't use this macro to turn on various extra optimizations for
1031 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1037 /* Comes from final.c -- no real reason to change it. */
1038 #define MAX_CODE_ALIGN 16
1042 const struct processor_costs *cost; /* Processor costs */
1043 const int target_enable; /* Target flags to enable. */
1044 const int target_disable; /* Target flags to disable. */
1045 const int align_loop; /* Default alignments. */
1046 const int align_loop_max_skip;
1047 const int align_jump;
1048 const int align_jump_max_skip;
1049 const int align_func;
1051 const processor_target_table[PROCESSOR_max] =
1053 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1054 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1055 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1056 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1057 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1058 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1060 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1063 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1066 const char *const name; /* processor name or nickname. */
1067 const enum processor_type processor;
1068 const enum pta_flags
1073 PTA_PREFETCH_SSE = 8,
1079 const processor_alias_table[] =
1081 {"i386", PROCESSOR_I386, 0},
1082 {"i486", PROCESSOR_I486, 0},
1083 {"i586", PROCESSOR_PENTIUM, 0},
1084 {"pentium", PROCESSOR_PENTIUM, 0},
1085 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1086 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1087 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1089 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1090 {"i686", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1092 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1093 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1094 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1095 PTA_MMX | PTA_PREFETCH_SSE},
1096 {"k6", PROCESSOR_K6, PTA_MMX},
1097 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1099 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1101 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1102 | PTA_3DNOW | PTA_3DNOW_A},
1103 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1104 | PTA_3DNOW_A | PTA_SSE},
1105 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1106 | PTA_3DNOW_A | PTA_SSE},
1107 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1108 | PTA_3DNOW_A | PTA_SSE},
1109 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1110 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1113 int const pta_size = ARRAY_SIZE (processor_alias_table);
1115 /* By default our XFmode is the 80-bit extended format. If we have
1116 use TFmode instead, it's also the 80-bit format, but with padding. */
1117 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1118 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1120 /* Set the default values for switches whose default depends on TARGET_64BIT
1121 in case they weren't overwritten by command line options. */
1124 if (flag_omit_frame_pointer == 2)
1125 flag_omit_frame_pointer = 1;
1126 if (flag_asynchronous_unwind_tables == 2)
1127 flag_asynchronous_unwind_tables = 1;
1128 if (flag_pcc_struct_return == 2)
1129 flag_pcc_struct_return = 0;
1133 if (flag_omit_frame_pointer == 2)
1134 flag_omit_frame_pointer = 0;
1135 if (flag_asynchronous_unwind_tables == 2)
1136 flag_asynchronous_unwind_tables = 0;
1137 if (flag_pcc_struct_return == 2)
1138 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1141 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1142 SUBTARGET_OVERRIDE_OPTIONS;
1145 if (!ix86_tune_string && ix86_arch_string)
1146 ix86_tune_string = ix86_arch_string;
1147 if (!ix86_tune_string)
1148 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1149 if (!ix86_arch_string)
1150 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1152 if (ix86_cmodel_string != 0)
1154 if (!strcmp (ix86_cmodel_string, "small"))
1155 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1157 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1158 else if (!strcmp (ix86_cmodel_string, "32"))
1159 ix86_cmodel = CM_32;
1160 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1161 ix86_cmodel = CM_KERNEL;
1162 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1163 ix86_cmodel = CM_MEDIUM;
1164 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1165 ix86_cmodel = CM_LARGE;
1167 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1171 ix86_cmodel = CM_32;
1173 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1175 if (ix86_asm_string != 0)
1177 if (!strcmp (ix86_asm_string, "intel"))
1178 ix86_asm_dialect = ASM_INTEL;
1179 else if (!strcmp (ix86_asm_string, "att"))
1180 ix86_asm_dialect = ASM_ATT;
1182 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1184 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1185 error ("code model `%s' not supported in the %s bit mode",
1186 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1187 if (ix86_cmodel == CM_LARGE)
1188 sorry ("code model `large' not supported yet");
1189 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1190 sorry ("%i-bit mode not compiled in",
1191 (target_flags & MASK_64BIT) ? 64 : 32);
1193 for (i = 0; i < pta_size; i++)
1194 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1196 ix86_arch = processor_alias_table[i].processor;
1197 /* Default cpu tuning to the architecture. */
1198 ix86_tune = ix86_arch;
1199 if (processor_alias_table[i].flags & PTA_MMX
1200 && !(target_flags_explicit & MASK_MMX))
1201 target_flags |= MASK_MMX;
1202 if (processor_alias_table[i].flags & PTA_3DNOW
1203 && !(target_flags_explicit & MASK_3DNOW))
1204 target_flags |= MASK_3DNOW;
1205 if (processor_alias_table[i].flags & PTA_3DNOW_A
1206 && !(target_flags_explicit & MASK_3DNOW_A))
1207 target_flags |= MASK_3DNOW_A;
1208 if (processor_alias_table[i].flags & PTA_SSE
1209 && !(target_flags_explicit & MASK_SSE))
1210 target_flags |= MASK_SSE;
1211 if (processor_alias_table[i].flags & PTA_SSE2
1212 && !(target_flags_explicit & MASK_SSE2))
1213 target_flags |= MASK_SSE2;
1214 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1215 x86_prefetch_sse = true;
1216 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1217 error ("CPU you selected does not support x86-64 instruction set");
1222 error ("bad value (%s) for -march= switch", ix86_arch_string);
1224 for (i = 0; i < pta_size; i++)
1225 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1227 ix86_tune = processor_alias_table[i].processor;
1228 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1229 error ("CPU you selected does not support x86-64 instruction set");
1232 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1233 x86_prefetch_sse = true;
1235 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1238 ix86_cost = &size_cost;
1240 ix86_cost = processor_target_table[ix86_tune].cost;
1241 target_flags |= processor_target_table[ix86_tune].target_enable;
1242 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1244 /* Arrange to set up i386_stack_locals for all functions. */
1245 init_machine_status = ix86_init_machine_status;
1247 /* Validate -mregparm= value. */
1248 if (ix86_regparm_string)
1250 i = atoi (ix86_regparm_string);
1251 if (i < 0 || i > REGPARM_MAX)
1252 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1258 ix86_regparm = REGPARM_MAX;
1260 /* If the user has provided any of the -malign-* options,
1261 warn and use that value only if -falign-* is not set.
1262 Remove this code in GCC 3.2 or later. */
1263 if (ix86_align_loops_string)
1265 warning ("-malign-loops is obsolete, use -falign-loops");
1266 if (align_loops == 0)
1268 i = atoi (ix86_align_loops_string);
1269 if (i < 0 || i > MAX_CODE_ALIGN)
1270 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1272 align_loops = 1 << i;
1276 if (ix86_align_jumps_string)
1278 warning ("-malign-jumps is obsolete, use -falign-jumps");
1279 if (align_jumps == 0)
1281 i = atoi (ix86_align_jumps_string);
1282 if (i < 0 || i > MAX_CODE_ALIGN)
1283 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1285 align_jumps = 1 << i;
1289 if (ix86_align_funcs_string)
1291 warning ("-malign-functions is obsolete, use -falign-functions");
1292 if (align_functions == 0)
1294 i = atoi (ix86_align_funcs_string);
1295 if (i < 0 || i > MAX_CODE_ALIGN)
1296 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1298 align_functions = 1 << i;
1302 /* Default align_* from the processor table. */
1303 if (align_loops == 0)
1305 align_loops = processor_target_table[ix86_tune].align_loop;
1306 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1308 if (align_jumps == 0)
1310 align_jumps = processor_target_table[ix86_tune].align_jump;
1311 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1313 if (align_functions == 0)
1315 align_functions = processor_target_table[ix86_tune].align_func;
1318 /* Validate -mpreferred-stack-boundary= value, or provide default.
1319 The default of 128 bits is for Pentium III's SSE __m128, but we
1320 don't want additional code to keep the stack aligned when
1321 optimizing for code size. */
1322 ix86_preferred_stack_boundary = (optimize_size
1323 ? TARGET_64BIT ? 128 : 32
1325 if (ix86_preferred_stack_boundary_string)
1327 i = atoi (ix86_preferred_stack_boundary_string);
1328 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1329 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1330 TARGET_64BIT ? 4 : 2);
1332 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1335 /* Validate -mbranch-cost= value, or provide default. */
1336 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1337 if (ix86_branch_cost_string)
1339 i = atoi (ix86_branch_cost_string);
1341 error ("-mbranch-cost=%d is not between 0 and 5", i);
1343 ix86_branch_cost = i;
1346 if (ix86_tls_dialect_string)
1348 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1349 ix86_tls_dialect = TLS_DIALECT_GNU;
1350 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1351 ix86_tls_dialect = TLS_DIALECT_SUN;
1353 error ("bad value (%s) for -mtls-dialect= switch",
1354 ix86_tls_dialect_string);
1357 /* Keep nonleaf frame pointers. */
1358 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1359 flag_omit_frame_pointer = 1;
1361 /* If we're doing fast math, we don't care about comparison order
1362 wrt NaNs. This lets us use a shorter comparison sequence. */
1363 if (flag_unsafe_math_optimizations)
1364 target_flags &= ~MASK_IEEE_FP;
1366 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1367 since the insns won't need emulation. */
1368 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1369 target_flags &= ~MASK_NO_FANCY_MATH_387;
1373 if (TARGET_ALIGN_DOUBLE)
1374 error ("-malign-double makes no sense in the 64bit mode");
1376 error ("-mrtd calling convention not supported in the 64bit mode");
1377 /* Enable by default the SSE and MMX builtins. */
1378 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1379 ix86_fpmath = FPMATH_SSE;
1382 ix86_fpmath = FPMATH_387;
1384 if (ix86_fpmath_string != 0)
1386 if (! strcmp (ix86_fpmath_string, "387"))
1387 ix86_fpmath = FPMATH_387;
1388 else if (! strcmp (ix86_fpmath_string, "sse"))
1392 warning ("SSE instruction set disabled, using 387 arithmetics");
1393 ix86_fpmath = FPMATH_387;
1396 ix86_fpmath = FPMATH_SSE;
1398 else if (! strcmp (ix86_fpmath_string, "387,sse")
1399 || ! strcmp (ix86_fpmath_string, "sse,387"))
1403 warning ("SSE instruction set disabled, using 387 arithmetics");
1404 ix86_fpmath = FPMATH_387;
1406 else if (!TARGET_80387)
1408 warning ("387 instruction set disabled, using SSE arithmetics");
1409 ix86_fpmath = FPMATH_SSE;
1412 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1415 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1418 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1422 target_flags |= MASK_MMX;
1423 x86_prefetch_sse = true;
1426 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1429 target_flags |= MASK_MMX;
1430 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1431 extensions it adds. */
1432 if (x86_3dnow_a & (1 << ix86_arch))
1433 target_flags |= MASK_3DNOW_A;
1435 if ((x86_accumulate_outgoing_args & TUNEMASK)
1436 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1438 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1440 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1443 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1444 p = strchr (internal_label_prefix, 'X');
1445 internal_label_prefix_len = p - internal_label_prefix;
1451 optimization_options (level, size)
1453 int size ATTRIBUTE_UNUSED;
1455 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1456 make the problem with not enough registers even worse. */
1457 #ifdef INSN_SCHEDULING
1459 flag_schedule_insns = 0;
1462 /* The default values of these switches depend on the TARGET_64BIT
1463 that is not known at this moment. Mark these values with 2 and
1464 let user the to override these. In case there is no command line option
1465 specifying them, we will set the defaults in override_options. */
1467 flag_omit_frame_pointer = 2;
1468 flag_pcc_struct_return = 2;
1469 flag_asynchronous_unwind_tables = 2;
1472 /* Table of valid machine attributes. */
1473 const struct attribute_spec ix86_attribute_table[] =
1475 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1476 /* Stdcall attribute says callee is responsible for popping arguments
1477 if they are not variable. */
1478 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1479 /* Fastcall attribute says callee is responsible for popping arguments
1480 if they are not variable. */
1481 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1482 /* Cdecl attribute says the callee is a normal C declaration */
1483 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1484 /* Regparm attribute specifies how many integer arguments are to be
1485 passed in registers. */
1486 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1487 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1488 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1489 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1490 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1492 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1493 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1494 { NULL, 0, 0, false, false, false, NULL }
1497 /* Decide whether we can make a sibling call to a function. DECL is the
1498 declaration of the function being targeted by the call and EXP is the
1499 CALL_EXPR representing the call. */
1502 ix86_function_ok_for_sibcall (decl, exp)
1506 /* If we are generating position-independent code, we cannot sibcall
1507 optimize any indirect call, or a direct call to a global function,
1508 as the PLT requires %ebx be live. */
1509 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1512 /* If we are returning floats on the 80387 register stack, we cannot
1513 make a sibcall from a function that doesn't return a float to a
1514 function that does or, conversely, from a function that does return
1515 a float to a function that doesn't; the necessary stack adjustment
1516 would not be executed. */
1517 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1518 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1521 /* If this call is indirect, we'll need to be able to use a call-clobbered
1522 register for the address of the target function. Make sure that all
1523 such registers are not used for passing parameters. */
1524 if (!decl && !TARGET_64BIT)
1526 int regparm = ix86_regparm;
1529 /* We're looking at the CALL_EXPR, we need the type of the function. */
1530 type = TREE_OPERAND (exp, 0); /* pointer expression */
1531 type = TREE_TYPE (type); /* pointer type */
1532 type = TREE_TYPE (type); /* function type */
1534 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1536 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1540 /* ??? Need to count the actual number of registers to be used,
1541 not the possible number of registers. Fix later. */
1546 /* Otherwise okay. That also includes certain types of indirect calls. */
1550 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1551 arguments as in struct attribute_spec.handler. */
1553 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1556 tree args ATTRIBUTE_UNUSED;
1557 int flags ATTRIBUTE_UNUSED;
1560 if (TREE_CODE (*node) != FUNCTION_TYPE
1561 && TREE_CODE (*node) != METHOD_TYPE
1562 && TREE_CODE (*node) != FIELD_DECL
1563 && TREE_CODE (*node) != TYPE_DECL)
1565 warning ("`%s' attribute only applies to functions",
1566 IDENTIFIER_POINTER (name));
1567 *no_add_attrs = true;
1571 if (is_attribute_p ("fastcall", name))
1573 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1575 error ("fastcall and stdcall attributes are not compatible");
1577 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1579 error ("fastcall and regparm attributes are not compatible");
1582 else if (is_attribute_p ("stdcall", name))
1584 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1586 error ("fastcall and stdcall attributes are not compatible");
1593 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1600 /* Handle a "regparm" attribute;
1601 arguments as in struct attribute_spec.handler. */
1603 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1607 int flags ATTRIBUTE_UNUSED;
1610 if (TREE_CODE (*node) != FUNCTION_TYPE
1611 && TREE_CODE (*node) != METHOD_TYPE
1612 && TREE_CODE (*node) != FIELD_DECL
1613 && TREE_CODE (*node) != TYPE_DECL)
1615 warning ("`%s' attribute only applies to functions",
1616 IDENTIFIER_POINTER (name));
1617 *no_add_attrs = true;
1623 cst = TREE_VALUE (args);
1624 if (TREE_CODE (cst) != INTEGER_CST)
1626 warning ("`%s' attribute requires an integer constant argument",
1627 IDENTIFIER_POINTER (name));
1628 *no_add_attrs = true;
1630 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1632 warning ("argument to `%s' attribute larger than %d",
1633 IDENTIFIER_POINTER (name), REGPARM_MAX);
1634 *no_add_attrs = true;
1637 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1639 error ("fastcall and regparm attributes are not compatible");
1646 /* Return 0 if the attributes for two types are incompatible, 1 if they
1647 are compatible, and 2 if they are nearly compatible (which causes a
1648 warning to be generated). */
1651 ix86_comp_type_attributes (type1, type2)
1655 /* Check for mismatch of non-default calling convention. */
1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1661 /* Check for mismatched fastcall types */
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1666 /* Check for mismatched return types (cdecl vs stdcall). */
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1673 /* Return the regparm value for a fuctio with the indicated TYPE. */
1676 ix86_fntype_regparm (type)
1681 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1683 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1685 return ix86_regparm;
1688 /* Value is the number of bytes of arguments automatically
1689 popped when returning from a subroutine call.
1690 FUNDECL is the declaration node of the function (as a tree),
1691 FUNTYPE is the data type of the function (as a tree),
1692 or for a library call it is an identifier node for the subroutine name.
1693 SIZE is the number of bytes of arguments passed on the stack.
1695 On the 80386, the RTD insn may be used to pop them if the number
1696 of args is fixed, but if the number is variable then the caller
1697 must pop them all. RTD can't be used for library calls now
1698 because the library is compiled with the Unix compiler.
1699 Use of RTD is a selectable option, since it is incompatible with
1700 standard Unix calling sequences. If the option is not selected,
1701 the caller must always pop the args.
1703 The attribute stdcall is equivalent to RTD on a per module basis. */
1706 ix86_return_pops_args (fundecl, funtype, size)
1711 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1713 /* Cdecl functions override -mrtd, and never pop the stack. */
1714 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1716 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1717 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1718 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1722 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1723 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1724 == void_type_node)))
1728 /* Lose any fake structure return argument if it is passed on the stack. */
1729 if (aggregate_value_p (TREE_TYPE (funtype))
1732 int nregs = ix86_fntype_regparm (funtype);
1735 return GET_MODE_SIZE (Pmode);
1741 /* Argument support functions. */
1743 /* Return true when register may be used to pass function parameters. */
1745 ix86_function_arg_regno_p (regno)
1750 return (regno < REGPARM_MAX
1751 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1752 if (SSE_REGNO_P (regno) && TARGET_SSE)
1754 /* RAX is used as hidden argument to va_arg functions. */
1757 for (i = 0; i < REGPARM_MAX; i++)
1758 if (regno == x86_64_int_parameter_registers[i])
1763 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1764 for a call to a function whose data type is FNTYPE.
1765 For a library call, FNTYPE is 0. */
1768 init_cumulative_args (cum, fntype, libname)
1769 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1770 tree fntype; /* tree ptr for function decl */
1771 rtx libname; /* SYMBOL_REF of library name or 0 */
1773 static CUMULATIVE_ARGS zero_cum;
1774 tree param, next_param;
1776 if (TARGET_DEBUG_ARG)
1778 fprintf (stderr, "\ninit_cumulative_args (");
1780 fprintf (stderr, "fntype code = %s, ret code = %s",
1781 tree_code_name[(int) TREE_CODE (fntype)],
1782 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1784 fprintf (stderr, "no fntype");
1787 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1792 /* Set up the number of registers to use for passing arguments. */
1793 cum->nregs = ix86_regparm;
1794 cum->sse_nregs = SSE_REGPARM_MAX;
1795 if (fntype && !TARGET_64BIT)
1797 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1800 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1802 cum->maybe_vaarg = false;
1804 /* Use ecx and edx registers if function has fastcall attribute */
1805 if (fntype && !TARGET_64BIT)
1807 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1815 /* Determine if this function has variable arguments. This is
1816 indicated by the last argument being 'void_type_mode' if there
1817 are no variable arguments. If there are variable arguments, then
1818 we won't pass anything in registers */
1822 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1823 param != 0; param = next_param)
1825 next_param = TREE_CHAIN (param);
1826 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1833 cum->maybe_vaarg = true;
1837 if ((!fntype && !libname)
1838 || (fntype && !TYPE_ARG_TYPES (fntype)))
1839 cum->maybe_vaarg = 1;
1841 if (TARGET_DEBUG_ARG)
1842 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1847 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1848 of this code is to classify each 8bytes of incoming argument by the register
1849 class and assign registers accordingly. */
1851 /* Return the union class of CLASS1 and CLASS2.
1852 See the x86-64 PS ABI for details. */
1854 static enum x86_64_reg_class
1855 merge_classes (class1, class2)
1856 enum x86_64_reg_class class1, class2;
1858 /* Rule #1: If both classes are equal, this is the resulting class. */
1859 if (class1 == class2)
1862 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1864 if (class1 == X86_64_NO_CLASS)
1866 if (class2 == X86_64_NO_CLASS)
1869 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1870 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1871 return X86_64_MEMORY_CLASS;
1873 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1874 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1875 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1876 return X86_64_INTEGERSI_CLASS;
1877 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1878 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1879 return X86_64_INTEGER_CLASS;
1881 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1882 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1883 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1884 return X86_64_MEMORY_CLASS;
1886 /* Rule #6: Otherwise class SSE is used. */
1887 return X86_64_SSE_CLASS;
1890 /* Classify the argument of type TYPE and mode MODE.
1891 CLASSES will be filled by the register class used to pass each word
1892 of the operand. The number of words is returned. In case the parameter
1893 should be passed in memory, 0 is returned. As a special case for zero
1894 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1896 BIT_OFFSET is used internally for handling records and specifies offset
1897 of the offset in bits modulo 256 to avoid overflow cases.
1899 See the x86-64 PS ABI for details.
1903 classify_argument (mode, type, classes, bit_offset)
1904 enum machine_mode mode;
1906 enum x86_64_reg_class classes[MAX_CLASSES];
1910 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1911 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1913 /* Variable sized entities are always passed/returned in memory. */
1917 if (type && AGGREGATE_TYPE_P (type))
1921 enum x86_64_reg_class subclasses[MAX_CLASSES];
1923 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1927 for (i = 0; i < words; i++)
1928 classes[i] = X86_64_NO_CLASS;
1930 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1931 signalize memory class, so handle it as special case. */
1934 classes[0] = X86_64_NO_CLASS;
1938 /* Classify each field of record and merge classes. */
1939 if (TREE_CODE (type) == RECORD_TYPE)
1941 /* For classes first merge in the field of the subclasses. */
1942 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1944 tree bases = TYPE_BINFO_BASETYPES (type);
1945 int n_bases = TREE_VEC_LENGTH (bases);
1948 for (i = 0; i < n_bases; ++i)
1950 tree binfo = TREE_VEC_ELT (bases, i);
1952 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1953 tree type = BINFO_TYPE (binfo);
1955 num = classify_argument (TYPE_MODE (type),
1957 (offset + bit_offset) % 256);
1960 for (i = 0; i < num; i++)
1962 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1964 merge_classes (subclasses[i], classes[i + pos]);
1968 /* And now merge the fields of structure. */
1969 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1971 if (TREE_CODE (field) == FIELD_DECL)
1975 /* Bitfields are always classified as integer. Handle them
1976 early, since later code would consider them to be
1977 misaligned integers. */
1978 if (DECL_BIT_FIELD (field))
1980 for (i = int_bit_position (field) / 8 / 8;
1981 i < (int_bit_position (field)
1982 + tree_low_cst (DECL_SIZE (field), 0)
1985 merge_classes (X86_64_INTEGER_CLASS,
1990 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1991 TREE_TYPE (field), subclasses,
1992 (int_bit_position (field)
1993 + bit_offset) % 256);
1996 for (i = 0; i < num; i++)
1999 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2001 merge_classes (subclasses[i], classes[i + pos]);
2007 /* Arrays are handled as small records. */
2008 else if (TREE_CODE (type) == ARRAY_TYPE)
2011 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2012 TREE_TYPE (type), subclasses, bit_offset);
2016 /* The partial classes are now full classes. */
2017 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2018 subclasses[0] = X86_64_SSE_CLASS;
2019 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2020 subclasses[0] = X86_64_INTEGER_CLASS;
2022 for (i = 0; i < words; i++)
2023 classes[i] = subclasses[i % num];
2025 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2026 else if (TREE_CODE (type) == UNION_TYPE
2027 || TREE_CODE (type) == QUAL_UNION_TYPE)
2029 /* For classes first merge in the field of the subclasses. */
2030 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2032 tree bases = TYPE_BINFO_BASETYPES (type);
2033 int n_bases = TREE_VEC_LENGTH (bases);
2036 for (i = 0; i < n_bases; ++i)
2038 tree binfo = TREE_VEC_ELT (bases, i);
2040 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2041 tree type = BINFO_TYPE (binfo);
2043 num = classify_argument (TYPE_MODE (type),
2045 (offset + (bit_offset % 64)) % 256);
2048 for (i = 0; i < num; i++)
2050 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2052 merge_classes (subclasses[i], classes[i + pos]);
2056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2058 if (TREE_CODE (field) == FIELD_DECL)
2061 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2062 TREE_TYPE (field), subclasses,
2066 for (i = 0; i < num; i++)
2067 classes[i] = merge_classes (subclasses[i], classes[i]);
2074 /* Final merger cleanup. */
2075 for (i = 0; i < words; i++)
2077 /* If one class is MEMORY, everything should be passed in
2079 if (classes[i] == X86_64_MEMORY_CLASS)
2082 /* The X86_64_SSEUP_CLASS should be always preceded by
2083 X86_64_SSE_CLASS. */
2084 if (classes[i] == X86_64_SSEUP_CLASS
2085 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2086 classes[i] = X86_64_SSE_CLASS;
2088 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2089 if (classes[i] == X86_64_X87UP_CLASS
2090 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2091 classes[i] = X86_64_SSE_CLASS;
2096 /* Compute alignment needed. We align all types to natural boundaries with
2097 exception of XFmode that is aligned to 64bits. */
2098 if (mode != VOIDmode && mode != BLKmode)
2100 int mode_alignment = GET_MODE_BITSIZE (mode);
2103 mode_alignment = 128;
2104 else if (mode == XCmode)
2105 mode_alignment = 256;
2106 /* Misaligned fields are always returned in memory. */
2107 if (bit_offset % mode_alignment)
2111 /* Classification of atomic types. */
2121 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2122 classes[0] = X86_64_INTEGERSI_CLASS;
2124 classes[0] = X86_64_INTEGER_CLASS;
2128 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2131 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2132 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2135 if (!(bit_offset % 64))
2136 classes[0] = X86_64_SSESF_CLASS;
2138 classes[0] = X86_64_SSE_CLASS;
2141 classes[0] = X86_64_SSEDF_CLASS;
2144 classes[0] = X86_64_X87_CLASS;
2145 classes[1] = X86_64_X87UP_CLASS;
2148 classes[0] = X86_64_X87_CLASS;
2149 classes[1] = X86_64_X87UP_CLASS;
2150 classes[2] = X86_64_X87_CLASS;
2151 classes[3] = X86_64_X87UP_CLASS;
2154 classes[0] = X86_64_SSEDF_CLASS;
2155 classes[1] = X86_64_SSEDF_CLASS;
2158 classes[0] = X86_64_SSE_CLASS;
2166 classes[0] = X86_64_SSE_CLASS;
2167 classes[1] = X86_64_SSEUP_CLASS;
2182 /* Examine the argument and return set number of register required in each
2183 class. Return 0 iff parameter should be passed in memory. */
2185 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2186 enum machine_mode mode;
2188 int *int_nregs, *sse_nregs;
2191 enum x86_64_reg_class class[MAX_CLASSES];
2192 int n = classify_argument (mode, type, class, 0);
2198 for (n--; n >= 0; n--)
2201 case X86_64_INTEGER_CLASS:
2202 case X86_64_INTEGERSI_CLASS:
2205 case X86_64_SSE_CLASS:
2206 case X86_64_SSESF_CLASS:
2207 case X86_64_SSEDF_CLASS:
2210 case X86_64_NO_CLASS:
2211 case X86_64_SSEUP_CLASS:
2213 case X86_64_X87_CLASS:
2214 case X86_64_X87UP_CLASS:
2218 case X86_64_MEMORY_CLASS:
2223 /* Construct container for the argument used by GCC interface. See
2224 FUNCTION_ARG for the detailed description. */
2226 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2227 enum machine_mode mode;
2230 int nintregs, nsseregs;
2234 enum machine_mode tmpmode;
2236 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2237 enum x86_64_reg_class class[MAX_CLASSES];
2241 int needed_sseregs, needed_intregs;
2242 rtx exp[MAX_CLASSES];
2245 n = classify_argument (mode, type, class, 0);
2246 if (TARGET_DEBUG_ARG)
2249 fprintf (stderr, "Memory class\n");
2252 fprintf (stderr, "Classes:");
2253 for (i = 0; i < n; i++)
2255 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2257 fprintf (stderr, "\n");
2262 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2264 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2267 /* First construct simple cases. Avoid SCmode, since we want to use
2268 single register to pass this type. */
2269 if (n == 1 && mode != SCmode)
2272 case X86_64_INTEGER_CLASS:
2273 case X86_64_INTEGERSI_CLASS:
2274 return gen_rtx_REG (mode, intreg[0]);
2275 case X86_64_SSE_CLASS:
2276 case X86_64_SSESF_CLASS:
2277 case X86_64_SSEDF_CLASS:
2278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2279 case X86_64_X87_CLASS:
2280 return gen_rtx_REG (mode, FIRST_STACK_REG);
2281 case X86_64_NO_CLASS:
2282 /* Zero sized array, struct or class. */
2287 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2288 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2290 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2291 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2292 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2293 && class[1] == X86_64_INTEGER_CLASS
2294 && (mode == CDImode || mode == TImode)
2295 && intreg[0] + 1 == intreg[1])
2296 return gen_rtx_REG (mode, intreg[0]);
2298 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2299 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2300 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2302 /* Otherwise figure out the entries of the PARALLEL. */
2303 for (i = 0; i < n; i++)
2307 case X86_64_NO_CLASS:
2309 case X86_64_INTEGER_CLASS:
2310 case X86_64_INTEGERSI_CLASS:
2311 /* Merge TImodes on aligned occasions here too. */
2312 if (i * 8 + 8 > bytes)
2313 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2314 else if (class[i] == X86_64_INTEGERSI_CLASS)
2318 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2319 if (tmpmode == BLKmode)
2321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2322 gen_rtx_REG (tmpmode, *intreg),
2326 case X86_64_SSESF_CLASS:
2327 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2328 gen_rtx_REG (SFmode,
2329 SSE_REGNO (sse_regno)),
2333 case X86_64_SSEDF_CLASS:
2334 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2335 gen_rtx_REG (DFmode,
2336 SSE_REGNO (sse_regno)),
2340 case X86_64_SSE_CLASS:
2341 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2345 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2346 gen_rtx_REG (tmpmode,
2347 SSE_REGNO (sse_regno)),
2349 if (tmpmode == TImode)
2357 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2358 for (i = 0; i < nexps; i++)
2359 XVECEXP (ret, 0, i) = exp [i];
2363 /* Update the data in CUM to advance over an argument
2364 of mode MODE and data type TYPE.
2365 (TYPE is null for libcalls where that information may not be available.) */
2368 function_arg_advance (cum, mode, type, named)
2369 CUMULATIVE_ARGS *cum; /* current arg information */
2370 enum machine_mode mode; /* current arg mode */
2371 tree type; /* type of the argument or 0 if lib support */
2372 int named; /* whether or not the argument was named */
2375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2376 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2378 if (TARGET_DEBUG_ARG)
2380 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2381 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2384 int int_nregs, sse_nregs;
2385 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2386 cum->words += words;
2387 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2389 cum->nregs -= int_nregs;
2390 cum->sse_nregs -= sse_nregs;
2391 cum->regno += int_nregs;
2392 cum->sse_regno += sse_nregs;
2395 cum->words += words;
2399 if (TARGET_SSE && mode == TImode)
2401 cum->sse_words += words;
2402 cum->sse_nregs -= 1;
2403 cum->sse_regno += 1;
2404 if (cum->sse_nregs <= 0)
2412 cum->words += words;
2413 cum->nregs -= words;
2414 cum->regno += words;
2416 if (cum->nregs <= 0)
2426 /* Define where to put the arguments to a function.
2427 Value is zero to push the argument on the stack,
2428 or a hard register in which to store the argument.
2430 MODE is the argument's machine mode.
2431 TYPE is the data type of the argument (as a tree).
2432 This is null for libcalls where that information may
2434 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2435 the preceding args and about the function being called.
2436 NAMED is nonzero if this argument is a named parameter
2437 (otherwise it is an extra parameter matching an ellipsis). */
2440 function_arg (cum, mode, type, named)
2441 CUMULATIVE_ARGS *cum; /* current arg information */
2442 enum machine_mode mode; /* current arg mode */
2443 tree type; /* type of the argument or 0 if lib support */
2444 int named; /* != 0 for normal args, == 0 for ... args */
2448 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2449 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2451 /* Handle a hidden AL argument containing number of registers for varargs
2452 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2454 if (mode == VOIDmode)
2457 return GEN_INT (cum->maybe_vaarg
2458 ? (cum->sse_nregs < 0
2466 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2467 &x86_64_int_parameter_registers [cum->regno],
2472 /* For now, pass fp/complex values on the stack. */
2484 if (words <= cum->nregs)
2486 int regno = cum->regno;
2488 /* Fastcall allocates the first two DWORD (SImode) or
2489 smaller arguments to ECX and EDX. */
2492 if (mode == BLKmode || mode == DImode)
2495 /* ECX not EAX is the first allocated register. */
2499 ret = gen_rtx_REG (mode, regno);
2504 ret = gen_rtx_REG (mode, cum->sse_regno);
2508 if (TARGET_DEBUG_ARG)
2511 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2512 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2515 print_simple_rtl (stderr, ret);
2517 fprintf (stderr, ", stack");
2519 fprintf (stderr, " )\n");
2525 /* A C expression that indicates when an argument must be passed by
2526 reference. If nonzero for an argument, a copy of that argument is
2527 made in memory and a pointer to the argument is passed instead of
2528 the argument itself. The pointer is passed in whatever way is
2529 appropriate for passing a pointer to that type. */
2532 function_arg_pass_by_reference (cum, mode, type, named)
2533 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2534 enum machine_mode mode ATTRIBUTE_UNUSED;
2536 int named ATTRIBUTE_UNUSED;
2541 if (type && int_size_in_bytes (type) == -1)
2543 if (TARGET_DEBUG_ARG)
2544 fprintf (stderr, "function_arg_pass_by_reference\n");
2551 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2554 contains_128bit_aligned_vector_p (type)
2557 enum machine_mode mode = TYPE_MODE (type);
2558 if (SSE_REG_MODE_P (mode)
2559 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2561 if (TYPE_ALIGN (type) < 128)
2564 if (AGGREGATE_TYPE_P (type))
2566 /* Walk the agregates recursivly. */
2567 if (TREE_CODE (type) == RECORD_TYPE
2568 || TREE_CODE (type) == UNION_TYPE
2569 || TREE_CODE (type) == QUAL_UNION_TYPE)
2573 if (TYPE_BINFO (type) != NULL
2574 && TYPE_BINFO_BASETYPES (type) != NULL)
2576 tree bases = TYPE_BINFO_BASETYPES (type);
2577 int n_bases = TREE_VEC_LENGTH (bases);
2580 for (i = 0; i < n_bases; ++i)
2582 tree binfo = TREE_VEC_ELT (bases, i);
2583 tree type = BINFO_TYPE (binfo);
2585 if (contains_128bit_aligned_vector_p (type))
2589 /* And now merge the fields of structure. */
2590 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2592 if (TREE_CODE (field) == FIELD_DECL
2593 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2597 /* Just for use if some languages passes arrays by value. */
2598 else if (TREE_CODE (type) == ARRAY_TYPE)
2600 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2609 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2613 ix86_function_arg_boundary (mode, type)
2614 enum machine_mode mode;
2619 align = TYPE_ALIGN (type);
2621 align = GET_MODE_ALIGNMENT (mode);
2622 if (align < PARM_BOUNDARY)
2623 align = PARM_BOUNDARY;
2626 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2627 make an exception for SSE modes since these require 128bit
2630 The handling here differs from field_alignment. ICC aligns MMX
2631 arguments to 4 byte boundaries, while structure fields are aligned
2632 to 8 byte boundaries. */
2635 if (!SSE_REG_MODE_P (mode))
2636 align = PARM_BOUNDARY;
2640 if (!contains_128bit_aligned_vector_p (type))
2641 align = PARM_BOUNDARY;
2643 if (align != PARM_BOUNDARY && !TARGET_SSE)
2651 /* Return true if N is a possible register number of function value. */
2653 ix86_function_value_regno_p (regno)
2658 return ((regno) == 0
2659 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2660 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2662 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2663 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2664 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2667 /* Define how to find the value returned by a function.
2668 VALTYPE is the data type of the value (as a tree).
2669 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2670 otherwise, FUNC is 0. */
2672 ix86_function_value (valtype)
2677 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2678 REGPARM_MAX, SSE_REGPARM_MAX,
2679 x86_64_int_return_registers, 0);
2680 /* For zero sized structures, construct_container return NULL, but we need
2681 to keep rest of compiler happy by returning meaningful value. */
2683 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2687 return gen_rtx_REG (TYPE_MODE (valtype),
2688 ix86_value_regno (TYPE_MODE (valtype)));
2691 /* Return false iff type is returned in memory. */
2693 ix86_return_in_memory (type)
2696 int needed_intregs, needed_sseregs;
2699 return !examine_argument (TYPE_MODE (type), type, 1,
2700 &needed_intregs, &needed_sseregs);
2704 if (TYPE_MODE (type) == BLKmode)
2706 else if (MS_AGGREGATE_RETURN
2707 && AGGREGATE_TYPE_P (type)
2708 && int_size_in_bytes(type) <= 8)
2710 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2711 && int_size_in_bytes (type) == 8)
2712 || (int_size_in_bytes (type) > 12
2713 && TYPE_MODE (type) != TImode
2714 && TYPE_MODE (type) != TFmode
2715 && !VECTOR_MODE_P (TYPE_MODE (type))))
2721 /* Define how to find the value returned by a library function
2722 assuming the value has mode MODE. */
2724 ix86_libcall_value (mode)
2725 enum machine_mode mode;
2735 return gen_rtx_REG (mode, FIRST_SSE_REG);
2738 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2740 return gen_rtx_REG (mode, 0);
2744 return gen_rtx_REG (mode, ix86_value_regno (mode));
2747 /* Given a mode, return the register to use for a return value. */
2750 ix86_value_regno (mode)
2751 enum machine_mode mode;
2753 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2754 return FIRST_FLOAT_REG;
2755 if (mode == TImode || VECTOR_MODE_P (mode))
2756 return FIRST_SSE_REG;
2760 /* Create the va_list data type. */
2763 ix86_build_va_list ()
2765 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2767 /* For i386 we use plain pointer to argument area. */
2769 return build_pointer_type (char_type_node);
2771 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2772 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2774 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2775 unsigned_type_node);
2776 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2777 unsigned_type_node);
2778 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2780 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2783 DECL_FIELD_CONTEXT (f_gpr) = record;
2784 DECL_FIELD_CONTEXT (f_fpr) = record;
2785 DECL_FIELD_CONTEXT (f_ovf) = record;
2786 DECL_FIELD_CONTEXT (f_sav) = record;
2788 TREE_CHAIN (record) = type_decl;
2789 TYPE_NAME (record) = type_decl;
2790 TYPE_FIELDS (record) = f_gpr;
2791 TREE_CHAIN (f_gpr) = f_fpr;
2792 TREE_CHAIN (f_fpr) = f_ovf;
2793 TREE_CHAIN (f_ovf) = f_sav;
2795 layout_type (record);
2797 /* The correct type is an array type of one element. */
2798 return build_array_type (record, build_index_type (size_zero_node));
2801 /* Perform any needed actions needed for a function that is receiving a
2802 variable number of arguments.
2806 MODE and TYPE are the mode and type of the current parameter.
2808 PRETEND_SIZE is a variable that should be set to the amount of stack
2809 that must be pushed by the prolog to pretend that our caller pushed
2812 Normally, this macro will push all remaining incoming registers on the
2813 stack and set PRETEND_SIZE to the length of the registers pushed. */
2816 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2817 CUMULATIVE_ARGS *cum;
2818 enum machine_mode mode;
2820 int *pretend_size ATTRIBUTE_UNUSED;
2824 CUMULATIVE_ARGS next_cum;
2825 rtx save_area = NULL_RTX, mem;
2838 /* Indicate to allocate space on the stack for varargs save area. */
2839 ix86_save_varrargs_registers = 1;
2841 fntype = TREE_TYPE (current_function_decl);
2842 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2843 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2844 != void_type_node));
2846 /* For varargs, we do not want to skip the dummy va_dcl argument.
2847 For stdargs, we do want to skip the last named argument. */
2850 function_arg_advance (&next_cum, mode, type, 1);
2853 save_area = frame_pointer_rtx;
2855 set = get_varargs_alias_set ();
2857 for (i = next_cum.regno; i < ix86_regparm; i++)
2859 mem = gen_rtx_MEM (Pmode,
2860 plus_constant (save_area, i * UNITS_PER_WORD));
2861 set_mem_alias_set (mem, set);
2862 emit_move_insn (mem, gen_rtx_REG (Pmode,
2863 x86_64_int_parameter_registers[i]));
2866 if (next_cum.sse_nregs)
2868 /* Now emit code to save SSE registers. The AX parameter contains number
2869 of SSE parameter registers used to call this function. We use
2870 sse_prologue_save insn template that produces computed jump across
2871 SSE saves. We need some preparation work to get this working. */
2873 label = gen_label_rtx ();
2874 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2876 /* Compute address to jump to :
2877 label - 5*eax + nnamed_sse_arguments*5 */
2878 tmp_reg = gen_reg_rtx (Pmode);
2879 nsse_reg = gen_reg_rtx (Pmode);
2880 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2881 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2882 gen_rtx_MULT (Pmode, nsse_reg,
2884 if (next_cum.sse_regno)
2887 gen_rtx_CONST (DImode,
2888 gen_rtx_PLUS (DImode,
2890 GEN_INT (next_cum.sse_regno * 4))));
2892 emit_move_insn (nsse_reg, label_ref);
2893 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2895 /* Compute address of memory block we save into. We always use pointer
2896 pointing 127 bytes after first byte to store - this is needed to keep
2897 instruction size limited by 4 bytes. */
2898 tmp_reg = gen_reg_rtx (Pmode);
2899 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2900 plus_constant (save_area,
2901 8 * REGPARM_MAX + 127)));
2902 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2903 set_mem_alias_set (mem, set);
2904 set_mem_align (mem, BITS_PER_WORD);
2906 /* And finally do the dirty job! */
2907 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2908 GEN_INT (next_cum.sse_regno), label));
2913 /* Implement va_start. */
2916 ix86_va_start (valist, nextarg)
2920 HOST_WIDE_INT words, n_gpr, n_fpr;
2921 tree f_gpr, f_fpr, f_ovf, f_sav;
2922 tree gpr, fpr, ovf, sav, t;
2924 /* Only 64bit target needs something special. */
2927 std_expand_builtin_va_start (valist, nextarg);
2931 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2932 f_fpr = TREE_CHAIN (f_gpr);
2933 f_ovf = TREE_CHAIN (f_fpr);
2934 f_sav = TREE_CHAIN (f_ovf);
2936 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2937 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2938 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2939 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2940 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2942 /* Count number of gp and fp argument registers used. */
2943 words = current_function_args_info.words;
2944 n_gpr = current_function_args_info.regno;
2945 n_fpr = current_function_args_info.sse_regno;
2947 if (TARGET_DEBUG_ARG)
2948 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2949 (int) words, (int) n_gpr, (int) n_fpr);
2951 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2952 build_int_2 (n_gpr * 8, 0));
2953 TREE_SIDE_EFFECTS (t) = 1;
2954 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2956 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2957 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2958 TREE_SIDE_EFFECTS (t) = 1;
2959 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2961 /* Find the overflow area. */
2962 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2964 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2965 build_int_2 (words * UNITS_PER_WORD, 0));
2966 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2967 TREE_SIDE_EFFECTS (t) = 1;
2968 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2970 /* Find the register save area.
2971 Prologue of the function save it right above stack frame. */
2972 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2973 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2974 TREE_SIDE_EFFECTS (t) = 1;
2975 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2978 /* Implement va_arg. */
2980 ix86_va_arg (valist, type)
2983 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2984 tree f_gpr, f_fpr, f_ovf, f_sav;
2985 tree gpr, fpr, ovf, sav, t;
2987 rtx lab_false, lab_over = NULL_RTX;
2992 /* Only 64bit target needs something special. */
2995 return std_expand_builtin_va_arg (valist, type);
2998 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2999 f_fpr = TREE_CHAIN (f_gpr);
3000 f_ovf = TREE_CHAIN (f_fpr);
3001 f_sav = TREE_CHAIN (f_ovf);
3003 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3004 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3005 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3006 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3007 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3009 size = int_size_in_bytes (type);
3012 /* Passed by reference. */
3014 type = build_pointer_type (type);
3015 size = int_size_in_bytes (type);
3017 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3019 container = construct_container (TYPE_MODE (type), type, 0,
3020 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3022 * Pull the value out of the saved registers ...
3025 addr_rtx = gen_reg_rtx (Pmode);
3029 rtx int_addr_rtx, sse_addr_rtx;
3030 int needed_intregs, needed_sseregs;
3033 lab_over = gen_label_rtx ();
3034 lab_false = gen_label_rtx ();
3036 examine_argument (TYPE_MODE (type), type, 0,
3037 &needed_intregs, &needed_sseregs);
3040 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3041 || TYPE_ALIGN (type) > 128);
3043 /* In case we are passing structure, verify that it is consecutive block
3044 on the register save area. If not we need to do moves. */
3045 if (!need_temp && !REG_P (container))
3047 /* Verify that all registers are strictly consecutive */
3048 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3052 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3054 rtx slot = XVECEXP (container, 0, i);
3055 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3056 || INTVAL (XEXP (slot, 1)) != i * 16)
3064 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3066 rtx slot = XVECEXP (container, 0, i);
3067 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3068 || INTVAL (XEXP (slot, 1)) != i * 8)
3075 int_addr_rtx = addr_rtx;
3076 sse_addr_rtx = addr_rtx;
3080 int_addr_rtx = gen_reg_rtx (Pmode);
3081 sse_addr_rtx = gen_reg_rtx (Pmode);
3083 /* First ensure that we fit completely in registers. */
3086 emit_cmp_and_jump_insns (expand_expr
3087 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3088 GEN_INT ((REGPARM_MAX - needed_intregs +
3089 1) * 8), GE, const1_rtx, SImode,
3094 emit_cmp_and_jump_insns (expand_expr
3095 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3096 GEN_INT ((SSE_REGPARM_MAX -
3097 needed_sseregs + 1) * 16 +
3098 REGPARM_MAX * 8), GE, const1_rtx,
3099 SImode, 1, lab_false);
3102 /* Compute index to start of area used for integer regs. */
3105 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3106 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3107 if (r != int_addr_rtx)
3108 emit_move_insn (int_addr_rtx, r);
3112 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3113 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3114 if (r != sse_addr_rtx)
3115 emit_move_insn (sse_addr_rtx, r);
3122 /* Never use the memory itself, as it has the alias set. */
3123 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3124 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3125 set_mem_alias_set (mem, get_varargs_alias_set ());
3126 set_mem_align (mem, BITS_PER_UNIT);
3128 for (i = 0; i < XVECLEN (container, 0); i++)
3130 rtx slot = XVECEXP (container, 0, i);
3131 rtx reg = XEXP (slot, 0);
3132 enum machine_mode mode = GET_MODE (reg);
3138 if (SSE_REGNO_P (REGNO (reg)))
3140 src_addr = sse_addr_rtx;
3141 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3145 src_addr = int_addr_rtx;
3146 src_offset = REGNO (reg) * 8;
3148 src_mem = gen_rtx_MEM (mode, src_addr);
3149 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3150 src_mem = adjust_address (src_mem, mode, src_offset);
3151 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3152 emit_move_insn (dest_mem, src_mem);