1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
525 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* In case the average insn count for single function invocation is
528 lower than this constant, emit fast (but longer) prologue and
530 #define FAST_PROLOGUE_INSN_COUNT 20
532 /* Set by prologue expander and used by epilogue expander to determine
534 static int use_fast_prologue_epilogue;
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
547 AREG, DREG, CREG, BREG,
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 /* The encoding characters for the four TLS models present in ELF. */
676 static char const tls_model_chars[] = " GLil";
678 #define MAX_386_STACK_LOCALS 3
679 /* Size of the register save area. */
680 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
682 /* Define the structure for the machine field in struct function. */
684 struct stack_local_entry GTY(())
689 struct stack_local_entry *next;
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string; /* for -mtune=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static int ix86_address_cost PARAMS ((rtx));
844 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
845 static rtx ix86_delegitimize_address PARAMS ((rtx));
847 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
848 static const char *ix86_strip_name_encoding PARAMS ((const char *))
851 struct builtin_description;
852 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
854 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
856 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
857 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
858 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
859 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
860 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
861 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
862 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
866 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
868 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
872 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
873 static int ix86_save_reg PARAMS ((unsigned int, int));
874 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
875 static int ix86_comp_type_attributes PARAMS ((tree, tree));
876 static int ix86_fntype_regparm PARAMS ((tree));
877 const struct attribute_spec ix86_attribute_table[];
878 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
879 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
880 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static int ix86_value_regno PARAMS ((enum machine_mode));
882 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
883 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
884 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
885 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
886 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
888 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
889 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
892 /* Register class used for passing given 64bit part of the argument.
893 These represent classes as documented by the PS ABI, with the exception
894 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
895 use SF or DFmode move instead of DImode to avoid reformatting penalties.
897 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
898 whenever possible (upper half does contain padding).
900 enum x86_64_reg_class
903 X86_64_INTEGER_CLASS,
904 X86_64_INTEGERSI_CLASS,
913 static const char * const x86_64_reg_class_name[] =
914 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916 #define MAX_CLASSES 4
917 static int classify_argument PARAMS ((enum machine_mode, tree,
918 enum x86_64_reg_class [MAX_CLASSES],
920 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
922 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
925 enum x86_64_reg_class));
927 /* Table of constants used by fldpi, fldln2, etc... */
928 static REAL_VALUE_TYPE ext_80387_constants_table [5];
929 static bool ext_80387_constants_init = 0;
930 static void init_ext_80387_constants PARAMS ((void));
932 /* Initialize the GCC target structure. */
933 #undef TARGET_ATTRIBUTE_TABLE
934 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
935 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
936 # undef TARGET_MERGE_DECL_ATTRIBUTES
937 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
940 #undef TARGET_COMP_TYPE_ATTRIBUTES
941 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
943 #undef TARGET_INIT_BUILTINS
944 #define TARGET_INIT_BUILTINS ix86_init_builtins
946 #undef TARGET_EXPAND_BUILTIN
947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
949 #undef TARGET_ASM_FUNCTION_EPILOGUE
950 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
952 #undef TARGET_ASM_OPEN_PAREN
953 #define TARGET_ASM_OPEN_PAREN ""
954 #undef TARGET_ASM_CLOSE_PAREN
955 #define TARGET_ASM_CLOSE_PAREN ""
957 #undef TARGET_ASM_ALIGNED_HI_OP
958 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959 #undef TARGET_ASM_ALIGNED_SI_OP
960 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
962 #undef TARGET_ASM_ALIGNED_DI_OP
963 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
966 #undef TARGET_ASM_UNALIGNED_HI_OP
967 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968 #undef TARGET_ASM_UNALIGNED_SI_OP
969 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970 #undef TARGET_ASM_UNALIGNED_DI_OP
971 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
973 #undef TARGET_SCHED_ADJUST_COST
974 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975 #undef TARGET_SCHED_ISSUE_RATE
976 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977 #undef TARGET_SCHED_VARIABLE_ISSUE
978 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979 #undef TARGET_SCHED_INIT
980 #define TARGET_SCHED_INIT ix86_sched_init
981 #undef TARGET_SCHED_REORDER
982 #define TARGET_SCHED_REORDER ix86_sched_reorder
983 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
984 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985 ia32_use_dfa_pipeline_interface
986 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988 ia32_multipass_dfa_lookahead
990 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
991 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
994 #undef TARGET_HAVE_TLS
995 #define TARGET_HAVE_TLS true
997 #undef TARGET_CANNOT_FORCE_CONST_MEM
998 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1000 #undef TARGET_DELEGITIMIZE_ADDRESS
1001 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1003 #undef TARGET_MS_BITFIELD_LAYOUT_P
1004 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1006 #undef TARGET_ASM_OUTPUT_MI_THUNK
1007 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1008 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1011 #undef TARGET_RTX_COSTS
1012 #define TARGET_RTX_COSTS ix86_rtx_costs
1013 #undef TARGET_ADDRESS_COST
1014 #define TARGET_ADDRESS_COST ix86_address_cost
1016 struct gcc_target targetm = TARGET_INITIALIZER;
1018 /* Sometimes certain combinations of command options do not make
1019 sense on a particular target machine. You can define a macro
1020 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1021 defined, is executed once just after all the command options have
1024 Don't use this macro to turn on various extra optimizations for
1025 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1031 /* Comes from final.c -- no real reason to change it. */
1032 #define MAX_CODE_ALIGN 16
1036 const struct processor_costs *cost; /* Processor costs */
1037 const int target_enable; /* Target flags to enable. */
1038 const int target_disable; /* Target flags to disable. */
1039 const int align_loop; /* Default alignments. */
1040 const int align_loop_max_skip;
1041 const int align_jump;
1042 const int align_jump_max_skip;
1043 const int align_func;
1045 const processor_target_table[PROCESSOR_max] =
1047 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1048 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1049 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1050 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1051 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1052 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1053 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1054 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1057 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1060 const char *const name; /* processor name or nickname. */
1061 const enum processor_type processor;
1062 const enum pta_flags
1067 PTA_PREFETCH_SSE = 8,
1073 const processor_alias_table[] =
1075 {"i386", PROCESSOR_I386, 0},
1076 {"i486", PROCESSOR_I486, 0},
1077 {"i586", PROCESSOR_PENTIUM, 0},
1078 {"pentium", PROCESSOR_PENTIUM, 0},
1079 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1080 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1081 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1082 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1083 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1084 {"i686", PROCESSOR_PENTIUMPRO, 0},
1085 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1086 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1087 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1088 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1089 PTA_MMX | PTA_PREFETCH_SSE},
1090 {"k6", PROCESSOR_K6, PTA_MMX},
1091 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1092 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1093 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1095 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1096 | PTA_3DNOW | PTA_3DNOW_A},
1097 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1098 | PTA_3DNOW_A | PTA_SSE},
1099 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 | PTA_3DNOW_A | PTA_SSE},
1101 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1102 | PTA_3DNOW_A | PTA_SSE},
1103 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1104 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1107 int const pta_size = ARRAY_SIZE (processor_alias_table);
1109 /* By default our XFmode is the 80-bit extended format. If we have
1110 use TFmode instead, it's also the 80-bit format, but with padding. */
1111 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1112 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1114 /* Set the default values for switches whose default depends on TARGET_64BIT
1115 in case they weren't overwritten by command line options. */
1118 if (flag_omit_frame_pointer == 2)
1119 flag_omit_frame_pointer = 1;
1120 if (flag_asynchronous_unwind_tables == 2)
1121 flag_asynchronous_unwind_tables = 1;
1122 if (flag_pcc_struct_return == 2)
1123 flag_pcc_struct_return = 0;
1127 if (flag_omit_frame_pointer == 2)
1128 flag_omit_frame_pointer = 0;
1129 if (flag_asynchronous_unwind_tables == 2)
1130 flag_asynchronous_unwind_tables = 0;
1131 if (flag_pcc_struct_return == 2)
1132 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1135 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1136 SUBTARGET_OVERRIDE_OPTIONS;
1139 if (!ix86_tune_string && ix86_arch_string)
1140 ix86_tune_string = ix86_arch_string;
1141 if (!ix86_tune_string)
1142 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1143 if (!ix86_arch_string)
1144 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1146 if (ix86_cmodel_string != 0)
1148 if (!strcmp (ix86_cmodel_string, "small"))
1149 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1151 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1152 else if (!strcmp (ix86_cmodel_string, "32"))
1153 ix86_cmodel = CM_32;
1154 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1155 ix86_cmodel = CM_KERNEL;
1156 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1157 ix86_cmodel = CM_MEDIUM;
1158 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1159 ix86_cmodel = CM_LARGE;
1161 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1165 ix86_cmodel = CM_32;
1167 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1169 if (ix86_asm_string != 0)
1171 if (!strcmp (ix86_asm_string, "intel"))
1172 ix86_asm_dialect = ASM_INTEL;
1173 else if (!strcmp (ix86_asm_string, "att"))
1174 ix86_asm_dialect = ASM_ATT;
1176 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1178 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1179 error ("code model `%s' not supported in the %s bit mode",
1180 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1181 if (ix86_cmodel == CM_LARGE)
1182 sorry ("code model `large' not supported yet");
1183 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1184 sorry ("%i-bit mode not compiled in",
1185 (target_flags & MASK_64BIT) ? 64 : 32);
1187 for (i = 0; i < pta_size; i++)
1188 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1190 ix86_arch = processor_alias_table[i].processor;
1191 /* Default cpu tuning to the architecture. */
1192 ix86_tune = ix86_arch;
1193 if (processor_alias_table[i].flags & PTA_MMX
1194 && !(target_flags_explicit & MASK_MMX))
1195 target_flags |= MASK_MMX;
1196 if (processor_alias_table[i].flags & PTA_3DNOW
1197 && !(target_flags_explicit & MASK_3DNOW))
1198 target_flags |= MASK_3DNOW;
1199 if (processor_alias_table[i].flags & PTA_3DNOW_A
1200 && !(target_flags_explicit & MASK_3DNOW_A))
1201 target_flags |= MASK_3DNOW_A;
1202 if (processor_alias_table[i].flags & PTA_SSE
1203 && !(target_flags_explicit & MASK_SSE))
1204 target_flags |= MASK_SSE;
1205 if (processor_alias_table[i].flags & PTA_SSE2
1206 && !(target_flags_explicit & MASK_SSE2))
1207 target_flags |= MASK_SSE2;
1208 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1209 x86_prefetch_sse = true;
1210 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1211 error ("CPU you selected does not support x86-64 instruction set");
1216 error ("bad value (%s) for -march= switch", ix86_arch_string);
1218 for (i = 0; i < pta_size; i++)
1219 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1221 ix86_tune = processor_alias_table[i].processor;
1222 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1223 error ("CPU you selected does not support x86-64 instruction set");
1226 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1227 x86_prefetch_sse = true;
1229 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1232 ix86_cost = &size_cost;
1234 ix86_cost = processor_target_table[ix86_tune].cost;
1235 target_flags |= processor_target_table[ix86_tune].target_enable;
1236 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1238 /* Arrange to set up i386_stack_locals for all functions. */
1239 init_machine_status = ix86_init_machine_status;
1241 /* Validate -mregparm= value. */
1242 if (ix86_regparm_string)
1244 i = atoi (ix86_regparm_string);
1245 if (i < 0 || i > REGPARM_MAX)
1246 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1252 ix86_regparm = REGPARM_MAX;
1254 /* If the user has provided any of the -malign-* options,
1255 warn and use that value only if -falign-* is not set.
1256 Remove this code in GCC 3.2 or later. */
1257 if (ix86_align_loops_string)
1259 warning ("-malign-loops is obsolete, use -falign-loops");
1260 if (align_loops == 0)
1262 i = atoi (ix86_align_loops_string);
1263 if (i < 0 || i > MAX_CODE_ALIGN)
1264 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1266 align_loops = 1 << i;
1270 if (ix86_align_jumps_string)
1272 warning ("-malign-jumps is obsolete, use -falign-jumps");
1273 if (align_jumps == 0)
1275 i = atoi (ix86_align_jumps_string);
1276 if (i < 0 || i > MAX_CODE_ALIGN)
1277 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1279 align_jumps = 1 << i;
1283 if (ix86_align_funcs_string)
1285 warning ("-malign-functions is obsolete, use -falign-functions");
1286 if (align_functions == 0)
1288 i = atoi (ix86_align_funcs_string);
1289 if (i < 0 || i > MAX_CODE_ALIGN)
1290 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1292 align_functions = 1 << i;
1296 /* Default align_* from the processor table. */
1297 if (align_loops == 0)
1299 align_loops = processor_target_table[ix86_tune].align_loop;
1300 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1302 if (align_jumps == 0)
1304 align_jumps = processor_target_table[ix86_tune].align_jump;
1305 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1307 if (align_functions == 0)
1309 align_functions = processor_target_table[ix86_tune].align_func;
1312 /* Validate -mpreferred-stack-boundary= value, or provide default.
1313 The default of 128 bits is for Pentium III's SSE __m128, but we
1314 don't want additional code to keep the stack aligned when
1315 optimizing for code size. */
1316 ix86_preferred_stack_boundary = (optimize_size
1317 ? TARGET_64BIT ? 128 : 32
1319 if (ix86_preferred_stack_boundary_string)
1321 i = atoi (ix86_preferred_stack_boundary_string);
1322 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1323 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1324 TARGET_64BIT ? 4 : 2);
1326 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1329 /* Validate -mbranch-cost= value, or provide default. */
1330 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1331 if (ix86_branch_cost_string)
1333 i = atoi (ix86_branch_cost_string);
1335 error ("-mbranch-cost=%d is not between 0 and 5", i);
1337 ix86_branch_cost = i;
1340 if (ix86_tls_dialect_string)
1342 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1343 ix86_tls_dialect = TLS_DIALECT_GNU;
1344 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1345 ix86_tls_dialect = TLS_DIALECT_SUN;
1347 error ("bad value (%s) for -mtls-dialect= switch",
1348 ix86_tls_dialect_string);
1351 /* Keep nonleaf frame pointers. */
1352 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1353 flag_omit_frame_pointer = 1;
1355 /* If we're doing fast math, we don't care about comparison order
1356 wrt NaNs. This lets us use a shorter comparison sequence. */
1357 if (flag_unsafe_math_optimizations)
1358 target_flags &= ~MASK_IEEE_FP;
1360 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1361 since the insns won't need emulation. */
1362 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1363 target_flags &= ~MASK_NO_FANCY_MATH_387;
1367 if (TARGET_ALIGN_DOUBLE)
1368 error ("-malign-double makes no sense in the 64bit mode");
1370 error ("-mrtd calling convention not supported in the 64bit mode");
1371 /* Enable by default the SSE and MMX builtins. */
1372 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1373 ix86_fpmath = FPMATH_SSE;
1376 ix86_fpmath = FPMATH_387;
1378 if (ix86_fpmath_string != 0)
1380 if (! strcmp (ix86_fpmath_string, "387"))
1381 ix86_fpmath = FPMATH_387;
1382 else if (! strcmp (ix86_fpmath_string, "sse"))
1386 warning ("SSE instruction set disabled, using 387 arithmetics");
1387 ix86_fpmath = FPMATH_387;
1390 ix86_fpmath = FPMATH_SSE;
1392 else if (! strcmp (ix86_fpmath_string, "387,sse")
1393 || ! strcmp (ix86_fpmath_string, "sse,387"))
1397 warning ("SSE instruction set disabled, using 387 arithmetics");
1398 ix86_fpmath = FPMATH_387;
1400 else if (!TARGET_80387)
1402 warning ("387 instruction set disabled, using SSE arithmetics");
1403 ix86_fpmath = FPMATH_SSE;
1406 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1409 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1412 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1416 target_flags |= MASK_MMX;
1417 x86_prefetch_sse = true;
1420 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1423 target_flags |= MASK_MMX;
1424 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1425 extensions it adds. */
1426 if (x86_3dnow_a & (1 << ix86_arch))
1427 target_flags |= MASK_3DNOW_A;
1429 if ((x86_accumulate_outgoing_args & TUNEMASK)
1430 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1432 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1434 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1437 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1438 p = strchr (internal_label_prefix, 'X');
1439 internal_label_prefix_len = p - internal_label_prefix;
1445 optimization_options (level, size)
1447 int size ATTRIBUTE_UNUSED;
1449 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1450 make the problem with not enough registers even worse. */
1451 #ifdef INSN_SCHEDULING
1453 flag_schedule_insns = 0;
1456 /* The default values of these switches depend on the TARGET_64BIT
1457 that is not known at this moment. Mark these values with 2 and
1458 let user the to override these. In case there is no command line option
1459 specifying them, we will set the defaults in override_options. */
1461 flag_omit_frame_pointer = 2;
1462 flag_pcc_struct_return = 2;
1463 flag_asynchronous_unwind_tables = 2;
1466 /* Table of valid machine attributes. */
1467 const struct attribute_spec ix86_attribute_table[] =
1469 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1470 /* Stdcall attribute says callee is responsible for popping arguments
1471 if they are not variable. */
1472 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1473 /* Fastcall attribute says callee is responsible for popping arguments
1474 if they are not variable. */
1475 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1476 /* Cdecl attribute says the callee is a normal C declaration */
1477 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1478 /* Regparm attribute specifies how many integer arguments are to be
1479 passed in registers. */
1480 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1481 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1482 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1483 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1484 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1486 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1487 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1488 { NULL, 0, 0, false, false, false, NULL }
1491 /* Decide whether we can make a sibling call to a function. DECL is the
1492 declaration of the function being targeted by the call and EXP is the
1493 CALL_EXPR representing the call. */
1496 ix86_function_ok_for_sibcall (decl, exp)
1500 /* If we are generating position-independent code, we cannot sibcall
1501 optimize any indirect call, or a direct call to a global function,
1502 as the PLT requires %ebx be live. */
1503 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1506 /* If we are returning floats on the 80387 register stack, we cannot
1507 make a sibcall from a function that doesn't return a float to a
1508 function that does or, conversely, from a function that does return
1509 a float to a function that doesn't; the necessary stack adjustment
1510 would not be executed. */
1511 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1512 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1515 /* If this call is indirect, we'll need to be able to use a call-clobbered
1516 register for the address of the target function. Make sure that all
1517 such registers are not used for passing parameters. */
1518 if (!decl && !TARGET_64BIT)
1520 int regparm = ix86_regparm;
1523 /* We're looking at the CALL_EXPR, we need the type of the function. */
1524 type = TREE_OPERAND (exp, 0); /* pointer expression */
1525 type = TREE_TYPE (type); /* pointer type */
1526 type = TREE_TYPE (type); /* function type */
1528 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1530 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1534 /* ??? Need to count the actual number of registers to be used,
1535 not the possible number of registers. Fix later. */
1540 /* Otherwise okay. That also includes certain types of indirect calls. */
1544 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1545 arguments as in struct attribute_spec.handler. */
1547 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1550 tree args ATTRIBUTE_UNUSED;
1551 int flags ATTRIBUTE_UNUSED;
1554 if (TREE_CODE (*node) != FUNCTION_TYPE
1555 && TREE_CODE (*node) != METHOD_TYPE
1556 && TREE_CODE (*node) != FIELD_DECL
1557 && TREE_CODE (*node) != TYPE_DECL)
1559 warning ("`%s' attribute only applies to functions",
1560 IDENTIFIER_POINTER (name));
1561 *no_add_attrs = true;
1565 if (is_attribute_p ("fastcall", name))
1567 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1569 error ("fastcall and stdcall attributes are not compatible");
1571 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1573 error ("fastcall and regparm attributes are not compatible");
1576 else if (is_attribute_p ("stdcall", name))
1578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and stdcall attributes are not compatible");
1587 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1588 *no_add_attrs = true;
1594 /* Handle a "regparm" attribute;
1595 arguments as in struct attribute_spec.handler. */
1597 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1601 int flags ATTRIBUTE_UNUSED;
1604 if (TREE_CODE (*node) != FUNCTION_TYPE
1605 && TREE_CODE (*node) != METHOD_TYPE
1606 && TREE_CODE (*node) != FIELD_DECL
1607 && TREE_CODE (*node) != TYPE_DECL)
1609 warning ("`%s' attribute only applies to functions",
1610 IDENTIFIER_POINTER (name));
1611 *no_add_attrs = true;
1617 cst = TREE_VALUE (args);
1618 if (TREE_CODE (cst) != INTEGER_CST)
1620 warning ("`%s' attribute requires an integer constant argument",
1621 IDENTIFIER_POINTER (name));
1622 *no_add_attrs = true;
1624 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1626 warning ("argument to `%s' attribute larger than %d",
1627 IDENTIFIER_POINTER (name), REGPARM_MAX);
1628 *no_add_attrs = true;
1631 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1633 error ("fastcall and regparm attributes are not compatible");
1640 /* Return 0 if the attributes for two types are incompatible, 1 if they
1641 are compatible, and 2 if they are nearly compatible (which causes a
1642 warning to be generated). */
1645 ix86_comp_type_attributes (type1, type2)
1649 /* Check for mismatch of non-default calling convention. */
1650 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1652 if (TREE_CODE (type1) != FUNCTION_TYPE)
1655 /* Check for mismatched fastcall types */
1656 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1657 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1660 /* Check for mismatched return types (cdecl vs stdcall). */
1661 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1662 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1667 /* Return the regparm value for a fuctio with the indicated TYPE. */
1670 ix86_fntype_regparm (type)
1675 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1677 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1679 return ix86_regparm;
1682 /* Value is the number of bytes of arguments automatically
1683 popped when returning from a subroutine call.
1684 FUNDECL is the declaration node of the function (as a tree),
1685 FUNTYPE is the data type of the function (as a tree),
1686 or for a library call it is an identifier node for the subroutine name.
1687 SIZE is the number of bytes of arguments passed on the stack.
1689 On the 80386, the RTD insn may be used to pop them if the number
1690 of args is fixed, but if the number is variable then the caller
1691 must pop them all. RTD can't be used for library calls now
1692 because the library is compiled with the Unix compiler.
1693 Use of RTD is a selectable option, since it is incompatible with
1694 standard Unix calling sequences. If the option is not selected,
1695 the caller must always pop the args.
1697 The attribute stdcall is equivalent to RTD on a per module basis. */
1700 ix86_return_pops_args (fundecl, funtype, size)
1705 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1707 /* Cdecl functions override -mrtd, and never pop the stack. */
1708 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1710 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1711 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1712 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1716 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1717 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1718 == void_type_node)))
1722 /* Lose any fake structure return argument if it is passed on the stack. */
1723 if (aggregate_value_p (TREE_TYPE (funtype))
1726 int nregs = ix86_fntype_regparm (funtype);
1729 return GET_MODE_SIZE (Pmode);
1735 /* Argument support functions. */
1737 /* Return true when register may be used to pass function parameters. */
1739 ix86_function_arg_regno_p (regno)
1744 return (regno < REGPARM_MAX
1745 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1746 if (SSE_REGNO_P (regno) && TARGET_SSE)
1748 /* RAX is used as hidden argument to va_arg functions. */
1751 for (i = 0; i < REGPARM_MAX; i++)
1752 if (regno == x86_64_int_parameter_registers[i])
1757 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1758 for a call to a function whose data type is FNTYPE.
1759 For a library call, FNTYPE is 0. */
1762 init_cumulative_args (cum, fntype, libname)
1763 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1764 tree fntype; /* tree ptr for function decl */
1765 rtx libname; /* SYMBOL_REF of library name or 0 */
1767 static CUMULATIVE_ARGS zero_cum;
1768 tree param, next_param;
1770 if (TARGET_DEBUG_ARG)
1772 fprintf (stderr, "\ninit_cumulative_args (");
1774 fprintf (stderr, "fntype code = %s, ret code = %s",
1775 tree_code_name[(int) TREE_CODE (fntype)],
1776 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1778 fprintf (stderr, "no fntype");
1781 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1786 /* Set up the number of registers to use for passing arguments. */
1787 cum->nregs = ix86_regparm;
1788 cum->sse_nregs = SSE_REGPARM_MAX;
1789 if (fntype && !TARGET_64BIT)
1791 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1794 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1796 cum->maybe_vaarg = false;
1798 /* Use ecx and edx registers if function has fastcall attribute */
1799 if (fntype && !TARGET_64BIT)
1801 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1809 /* Determine if this function has variable arguments. This is
1810 indicated by the last argument being 'void_type_mode' if there
1811 are no variable arguments. If there are variable arguments, then
1812 we won't pass anything in registers */
1816 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1817 param != 0; param = next_param)
1819 next_param = TREE_CHAIN (param);
1820 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1827 cum->maybe_vaarg = true;
1831 if ((!fntype && !libname)
1832 || (fntype && !TYPE_ARG_TYPES (fntype)))
1833 cum->maybe_vaarg = 1;
1835 if (TARGET_DEBUG_ARG)
1836 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1841 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1842 of this code is to classify each 8bytes of incoming argument by the register
1843 class and assign registers accordingly. */
1845 /* Return the union class of CLASS1 and CLASS2.
1846 See the x86-64 PS ABI for details. */
1848 static enum x86_64_reg_class
1849 merge_classes (class1, class2)
1850 enum x86_64_reg_class class1, class2;
1852 /* Rule #1: If both classes are equal, this is the resulting class. */
1853 if (class1 == class2)
1856 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1858 if (class1 == X86_64_NO_CLASS)
1860 if (class2 == X86_64_NO_CLASS)
1863 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1864 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1865 return X86_64_MEMORY_CLASS;
1867 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1868 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1869 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1870 return X86_64_INTEGERSI_CLASS;
1871 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1872 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1873 return X86_64_INTEGER_CLASS;
1875 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1876 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1877 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1878 return X86_64_MEMORY_CLASS;
1880 /* Rule #6: Otherwise class SSE is used. */
1881 return X86_64_SSE_CLASS;
1884 /* Classify the argument of type TYPE and mode MODE.
1885 CLASSES will be filled by the register class used to pass each word
1886 of the operand. The number of words is returned. In case the parameter
1887 should be passed in memory, 0 is returned. As a special case for zero
1888 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1890 BIT_OFFSET is used internally for handling records and specifies offset
1891 of the offset in bits modulo 256 to avoid overflow cases.
1893 See the x86-64 PS ABI for details.
1897 classify_argument (mode, type, classes, bit_offset)
1898 enum machine_mode mode;
1900 enum x86_64_reg_class classes[MAX_CLASSES];
1904 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1905 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1907 /* Variable sized entities are always passed/returned in memory. */
1911 if (type && AGGREGATE_TYPE_P (type))
1915 enum x86_64_reg_class subclasses[MAX_CLASSES];
1917 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1921 for (i = 0; i < words; i++)
1922 classes[i] = X86_64_NO_CLASS;
1924 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1925 signalize memory class, so handle it as special case. */
1928 classes[0] = X86_64_NO_CLASS;
1932 /* Classify each field of record and merge classes. */
1933 if (TREE_CODE (type) == RECORD_TYPE)
1935 /* For classes first merge in the field of the subclasses. */
1936 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1938 tree bases = TYPE_BINFO_BASETYPES (type);
1939 int n_bases = TREE_VEC_LENGTH (bases);
1942 for (i = 0; i < n_bases; ++i)
1944 tree binfo = TREE_VEC_ELT (bases, i);
1946 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1947 tree type = BINFO_TYPE (binfo);
1949 num = classify_argument (TYPE_MODE (type),
1951 (offset + bit_offset) % 256);
1954 for (i = 0; i < num; i++)
1956 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1958 merge_classes (subclasses[i], classes[i + pos]);
1962 /* And now merge the fields of structure. */
1963 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1965 if (TREE_CODE (field) == FIELD_DECL)
1969 /* Bitfields are always classified as integer. Handle them
1970 early, since later code would consider them to be
1971 misaligned integers. */
1972 if (DECL_BIT_FIELD (field))
1974 for (i = int_bit_position (field) / 8 / 8;
1975 i < (int_bit_position (field)
1976 + tree_low_cst (DECL_SIZE (field), 0)
1979 merge_classes (X86_64_INTEGER_CLASS,
1984 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1985 TREE_TYPE (field), subclasses,
1986 (int_bit_position (field)
1987 + bit_offset) % 256);
1990 for (i = 0; i < num; i++)
1993 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1995 merge_classes (subclasses[i], classes[i + pos]);
2001 /* Arrays are handled as small records. */
2002 else if (TREE_CODE (type) == ARRAY_TYPE)
2005 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2006 TREE_TYPE (type), subclasses, bit_offset);
2010 /* The partial classes are now full classes. */
2011 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2012 subclasses[0] = X86_64_SSE_CLASS;
2013 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2014 subclasses[0] = X86_64_INTEGER_CLASS;
2016 for (i = 0; i < words; i++)
2017 classes[i] = subclasses[i % num];
2019 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2020 else if (TREE_CODE (type) == UNION_TYPE
2021 || TREE_CODE (type) == QUAL_UNION_TYPE)
2023 /* For classes first merge in the field of the subclasses. */
2024 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2026 tree bases = TYPE_BINFO_BASETYPES (type);
2027 int n_bases = TREE_VEC_LENGTH (bases);
2030 for (i = 0; i < n_bases; ++i)
2032 tree binfo = TREE_VEC_ELT (bases, i);
2034 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2035 tree type = BINFO_TYPE (binfo);
2037 num = classify_argument (TYPE_MODE (type),
2039 (offset + (bit_offset % 64)) % 256);
2042 for (i = 0; i < num; i++)
2044 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2046 merge_classes (subclasses[i], classes[i + pos]);
2050 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2052 if (TREE_CODE (field) == FIELD_DECL)
2055 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2056 TREE_TYPE (field), subclasses,
2060 for (i = 0; i < num; i++)
2061 classes[i] = merge_classes (subclasses[i], classes[i]);
2068 /* Final merger cleanup. */
2069 for (i = 0; i < words; i++)
2071 /* If one class is MEMORY, everything should be passed in
2073 if (classes[i] == X86_64_MEMORY_CLASS)
2076 /* The X86_64_SSEUP_CLASS should be always preceded by
2077 X86_64_SSE_CLASS. */
2078 if (classes[i] == X86_64_SSEUP_CLASS
2079 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2080 classes[i] = X86_64_SSE_CLASS;
2082 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2083 if (classes[i] == X86_64_X87UP_CLASS
2084 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2085 classes[i] = X86_64_SSE_CLASS;
2090 /* Compute alignment needed. We align all types to natural boundaries with
2091 exception of XFmode that is aligned to 64bits. */
2092 if (mode != VOIDmode && mode != BLKmode)
2094 int mode_alignment = GET_MODE_BITSIZE (mode);
2097 mode_alignment = 128;
2098 else if (mode == XCmode)
2099 mode_alignment = 256;
2100 /* Misaligned fields are always returned in memory. */
2101 if (bit_offset % mode_alignment)
2105 /* Classification of atomic types. */
2115 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2116 classes[0] = X86_64_INTEGERSI_CLASS;
2118 classes[0] = X86_64_INTEGER_CLASS;
2122 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2125 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2126 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2129 if (!(bit_offset % 64))
2130 classes[0] = X86_64_SSESF_CLASS;
2132 classes[0] = X86_64_SSE_CLASS;
2135 classes[0] = X86_64_SSEDF_CLASS;
2138 classes[0] = X86_64_X87_CLASS;
2139 classes[1] = X86_64_X87UP_CLASS;
2142 classes[0] = X86_64_X87_CLASS;
2143 classes[1] = X86_64_X87UP_CLASS;
2144 classes[2] = X86_64_X87_CLASS;
2145 classes[3] = X86_64_X87UP_CLASS;
2148 classes[0] = X86_64_SSEDF_CLASS;
2149 classes[1] = X86_64_SSEDF_CLASS;
2152 classes[0] = X86_64_SSE_CLASS;
2160 classes[0] = X86_64_SSE_CLASS;
2161 classes[1] = X86_64_SSEUP_CLASS;
2176 /* Examine the argument and return set number of register required in each
2177 class. Return 0 iff parameter should be passed in memory. */
2179 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2180 enum machine_mode mode;
2182 int *int_nregs, *sse_nregs;
2185 enum x86_64_reg_class class[MAX_CLASSES];
2186 int n = classify_argument (mode, type, class, 0);
2192 for (n--; n >= 0; n--)
2195 case X86_64_INTEGER_CLASS:
2196 case X86_64_INTEGERSI_CLASS:
2199 case X86_64_SSE_CLASS:
2200 case X86_64_SSESF_CLASS:
2201 case X86_64_SSEDF_CLASS:
2204 case X86_64_NO_CLASS:
2205 case X86_64_SSEUP_CLASS:
2207 case X86_64_X87_CLASS:
2208 case X86_64_X87UP_CLASS:
2212 case X86_64_MEMORY_CLASS:
2217 /* Construct container for the argument used by GCC interface. See
2218 FUNCTION_ARG for the detailed description. */
2220 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2221 enum machine_mode mode;
2224 int nintregs, nsseregs;
2228 enum machine_mode tmpmode;
2230 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2231 enum x86_64_reg_class class[MAX_CLASSES];
2235 int needed_sseregs, needed_intregs;
2236 rtx exp[MAX_CLASSES];
2239 n = classify_argument (mode, type, class, 0);
2240 if (TARGET_DEBUG_ARG)
2243 fprintf (stderr, "Memory class\n");
2246 fprintf (stderr, "Classes:");
2247 for (i = 0; i < n; i++)
2249 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2251 fprintf (stderr, "\n");
2256 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2258 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2261 /* First construct simple cases. Avoid SCmode, since we want to use
2262 single register to pass this type. */
2263 if (n == 1 && mode != SCmode)
2266 case X86_64_INTEGER_CLASS:
2267 case X86_64_INTEGERSI_CLASS:
2268 return gen_rtx_REG (mode, intreg[0]);
2269 case X86_64_SSE_CLASS:
2270 case X86_64_SSESF_CLASS:
2271 case X86_64_SSEDF_CLASS:
2272 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2273 case X86_64_X87_CLASS:
2274 return gen_rtx_REG (mode, FIRST_STACK_REG);
2275 case X86_64_NO_CLASS:
2276 /* Zero sized array, struct or class. */
2281 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2282 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2284 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2285 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2286 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2287 && class[1] == X86_64_INTEGER_CLASS
2288 && (mode == CDImode || mode == TImode)
2289 && intreg[0] + 1 == intreg[1])
2290 return gen_rtx_REG (mode, intreg[0]);
2292 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2293 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2294 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2296 /* Otherwise figure out the entries of the PARALLEL. */
2297 for (i = 0; i < n; i++)
2301 case X86_64_NO_CLASS:
2303 case X86_64_INTEGER_CLASS:
2304 case X86_64_INTEGERSI_CLASS:
2305 /* Merge TImodes on aligned occasions here too. */
2306 if (i * 8 + 8 > bytes)
2307 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2308 else if (class[i] == X86_64_INTEGERSI_CLASS)
2312 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2313 if (tmpmode == BLKmode)
2315 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2316 gen_rtx_REG (tmpmode, *intreg),
2320 case X86_64_SSESF_CLASS:
2321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2322 gen_rtx_REG (SFmode,
2323 SSE_REGNO (sse_regno)),
2327 case X86_64_SSEDF_CLASS:
2328 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2329 gen_rtx_REG (DFmode,
2330 SSE_REGNO (sse_regno)),
2334 case X86_64_SSE_CLASS:
2335 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2339 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2340 gen_rtx_REG (tmpmode,
2341 SSE_REGNO (sse_regno)),
2343 if (tmpmode == TImode)
2351 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2352 for (i = 0; i < nexps; i++)
2353 XVECEXP (ret, 0, i) = exp [i];
2357 /* Update the data in CUM to advance over an argument
2358 of mode MODE and data type TYPE.
2359 (TYPE is null for libcalls where that information may not be available.) */
2362 function_arg_advance (cum, mode, type, named)
2363 CUMULATIVE_ARGS *cum; /* current arg information */
2364 enum machine_mode mode; /* current arg mode */
2365 tree type; /* type of the argument or 0 if lib support */
2366 int named; /* whether or not the argument was named */
2369 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2370 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2372 if (TARGET_DEBUG_ARG)
2374 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2375 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2378 int int_nregs, sse_nregs;
2379 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2380 cum->words += words;
2381 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2383 cum->nregs -= int_nregs;
2384 cum->sse_nregs -= sse_nregs;
2385 cum->regno += int_nregs;
2386 cum->sse_regno += sse_nregs;
2389 cum->words += words;
2393 if (TARGET_SSE && mode == TImode)
2395 cum->sse_words += words;
2396 cum->sse_nregs -= 1;
2397 cum->sse_regno += 1;
2398 if (cum->sse_nregs <= 0)
2406 cum->words += words;
2407 cum->nregs -= words;
2408 cum->regno += words;
2410 if (cum->nregs <= 0)
2420 /* Define where to put the arguments to a function.
2421 Value is zero to push the argument on the stack,
2422 or a hard register in which to store the argument.
2424 MODE is the argument's machine mode.
2425 TYPE is the data type of the argument (as a tree).
2426 This is null for libcalls where that information may
2428 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2429 the preceding args and about the function being called.
2430 NAMED is nonzero if this argument is a named parameter
2431 (otherwise it is an extra parameter matching an ellipsis). */
2434 function_arg (cum, mode, type, named)
2435 CUMULATIVE_ARGS *cum; /* current arg information */
2436 enum machine_mode mode; /* current arg mode */
2437 tree type; /* type of the argument or 0 if lib support */
2438 int named; /* != 0 for normal args, == 0 for ... args */
2442 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2443 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2445 /* Handle a hidden AL argument containing number of registers for varargs
2446 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2448 if (mode == VOIDmode)
2451 return GEN_INT (cum->maybe_vaarg
2452 ? (cum->sse_nregs < 0
2460 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2461 &x86_64_int_parameter_registers [cum->regno],
2466 /* For now, pass fp/complex values on the stack. */
2478 if (words <= cum->nregs)
2480 int regno = cum->regno;
2482 /* Fastcall allocates the first two DWORD (SImode) or
2483 smaller arguments to ECX and EDX. */
2486 if (mode == BLKmode || mode == DImode)
2489 /* ECX not EAX is the first allocated register. */
2493 ret = gen_rtx_REG (mode, regno);
2498 ret = gen_rtx_REG (mode, cum->sse_regno);
2502 if (TARGET_DEBUG_ARG)
2505 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2506 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2509 print_simple_rtl (stderr, ret);
2511 fprintf (stderr, ", stack");
2513 fprintf (stderr, " )\n");
2519 /* A C expression that indicates when an argument must be passed by
2520 reference. If nonzero for an argument, a copy of that argument is
2521 made in memory and a pointer to the argument is passed instead of
2522 the argument itself. The pointer is passed in whatever way is
2523 appropriate for passing a pointer to that type. */
2526 function_arg_pass_by_reference (cum, mode, type, named)
2527 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2528 enum machine_mode mode ATTRIBUTE_UNUSED;
2530 int named ATTRIBUTE_UNUSED;
2535 if (type && int_size_in_bytes (type) == -1)
2537 if (TARGET_DEBUG_ARG)
2538 fprintf (stderr, "function_arg_pass_by_reference\n");
2545 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2548 contains_128bit_aligned_vector_p (type)
2551 enum machine_mode mode = TYPE_MODE (type);
2552 if (SSE_REG_MODE_P (mode)
2553 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2555 if (TYPE_ALIGN (type) < 128)
2558 if (AGGREGATE_TYPE_P (type))
2560 /* Walk the agregates recursivly. */
2561 if (TREE_CODE (type) == RECORD_TYPE
2562 || TREE_CODE (type) == UNION_TYPE
2563 || TREE_CODE (type) == QUAL_UNION_TYPE)
2567 if (TYPE_BINFO (type) != NULL
2568 && TYPE_BINFO_BASETYPES (type) != NULL)
2570 tree bases = TYPE_BINFO_BASETYPES (type);
2571 int n_bases = TREE_VEC_LENGTH (bases);
2574 for (i = 0; i < n_bases; ++i)
2576 tree binfo = TREE_VEC_ELT (bases, i);
2577 tree type = BINFO_TYPE (binfo);
2579 if (contains_128bit_aligned_vector_p (type))
2583 /* And now merge the fields of structure. */
2584 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2586 if (TREE_CODE (field) == FIELD_DECL
2587 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2591 /* Just for use if some languages passes arrays by value. */
2592 else if (TREE_CODE (type) == ARRAY_TYPE)
2594 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2603 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2607 ix86_function_arg_boundary (mode, type)
2608 enum machine_mode mode;
2613 align = TYPE_ALIGN (type);
2615 align = GET_MODE_ALIGNMENT (mode);
2616 if (align < PARM_BOUNDARY)
2617 align = PARM_BOUNDARY;
2620 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2621 make an exception for SSE modes since these require 128bit
2624 The handling here differs from field_alignment. ICC aligns MMX
2625 arguments to 4 byte boundaries, while structure fields are aligned
2626 to 8 byte boundaries. */
2629 if (!SSE_REG_MODE_P (mode))
2630 align = PARM_BOUNDARY;
2634 if (!contains_128bit_aligned_vector_p (type))
2635 align = PARM_BOUNDARY;
2637 if (align != PARM_BOUNDARY && !TARGET_SSE)
2645 /* Return true if N is a possible register number of function value. */
2647 ix86_function_value_regno_p (regno)
2652 return ((regno) == 0
2653 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2654 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2656 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2657 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2658 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2661 /* Define how to find the value returned by a function.
2662 VALTYPE is the data type of the value (as a tree).
2663 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2664 otherwise, FUNC is 0. */
2666 ix86_function_value (valtype)
2671 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2672 REGPARM_MAX, SSE_REGPARM_MAX,
2673 x86_64_int_return_registers, 0);
2674 /* For zero sized structures, construct_container return NULL, but we need
2675 to keep rest of compiler happy by returning meaningful value. */
2677 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2681 return gen_rtx_REG (TYPE_MODE (valtype),
2682 ix86_value_regno (TYPE_MODE (valtype)));
2685 /* Return false iff type is returned in memory. */
2687 ix86_return_in_memory (type)
2690 int needed_intregs, needed_sseregs;
2693 return !examine_argument (TYPE_MODE (type), type, 1,
2694 &needed_intregs, &needed_sseregs);
2698 if (TYPE_MODE (type) == BLKmode)
2700 else if (MS_AGGREGATE_RETURN
2701 && AGGREGATE_TYPE_P (type)
2702 && int_size_in_bytes(type) <= 8)
2704 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2705 && int_size_in_bytes (type) == 8)
2706 || (int_size_in_bytes (type) > 12
2707 && TYPE_MODE (type) != TImode
2708 && TYPE_MODE (type) != TFmode
2709 && !VECTOR_MODE_P (TYPE_MODE (type))))
2715 /* Define how to find the value returned by a library function
2716 assuming the value has mode MODE. */
2718 ix86_libcall_value (mode)
2719 enum machine_mode mode;
2729 return gen_rtx_REG (mode, FIRST_SSE_REG);
2732 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2734 return gen_rtx_REG (mode, 0);
2738 return gen_rtx_REG (mode, ix86_value_regno (mode));
2741 /* Given a mode, return the register to use for a return value. */
2744 ix86_value_regno (mode)
2745 enum machine_mode mode;
2747 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2748 return FIRST_FLOAT_REG;
2749 if (mode == TImode || VECTOR_MODE_P (mode))
2750 return FIRST_SSE_REG;
2754 /* Create the va_list data type. */
2757 ix86_build_va_list ()
2759 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2761 /* For i386 we use plain pointer to argument area. */
2763 return build_pointer_type (char_type_node);
2765 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2766 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2768 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2769 unsigned_type_node);
2770 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2771 unsigned_type_node);
2772 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2774 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2777 DECL_FIELD_CONTEXT (f_gpr) = record;
2778 DECL_FIELD_CONTEXT (f_fpr) = record;
2779 DECL_FIELD_CONTEXT (f_ovf) = record;
2780 DECL_FIELD_CONTEXT (f_sav) = record;
2782 TREE_CHAIN (record) = type_decl;
2783 TYPE_NAME (record) = type_decl;
2784 TYPE_FIELDS (record) = f_gpr;
2785 TREE_CHAIN (f_gpr) = f_fpr;
2786 TREE_CHAIN (f_fpr) = f_ovf;
2787 TREE_CHAIN (f_ovf) = f_sav;
2789 layout_type (record);
2791 /* The correct type is an array type of one element. */
2792 return build_array_type (record, build_index_type (size_zero_node));
2795 /* Perform any needed actions needed for a function that is receiving a
2796 variable number of arguments.
2800 MODE and TYPE are the mode and type of the current parameter.
2802 PRETEND_SIZE is a variable that should be set to the amount of stack
2803 that must be pushed by the prolog to pretend that our caller pushed
2806 Normally, this macro will push all remaining incoming registers on the
2807 stack and set PRETEND_SIZE to the length of the registers pushed. */
2810 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2811 CUMULATIVE_ARGS *cum;
2812 enum machine_mode mode;
2814 int *pretend_size ATTRIBUTE_UNUSED;
2818 CUMULATIVE_ARGS next_cum;
2819 rtx save_area = NULL_RTX, mem;
2832 /* Indicate to allocate space on the stack for varargs save area. */
2833 ix86_save_varrargs_registers = 1;
2835 fntype = TREE_TYPE (current_function_decl);
2836 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2837 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2838 != void_type_node));
2840 /* For varargs, we do not want to skip the dummy va_dcl argument.
2841 For stdargs, we do want to skip the last named argument. */
2844 function_arg_advance (&next_cum, mode, type, 1);
2847 save_area = frame_pointer_rtx;
2849 set = get_varargs_alias_set ();
2851 for (i = next_cum.regno; i < ix86_regparm; i++)
2853 mem = gen_rtx_MEM (Pmode,
2854 plus_constant (save_area, i * UNITS_PER_WORD));
2855 set_mem_alias_set (mem, set);
2856 emit_move_insn (mem, gen_rtx_REG (Pmode,
2857 x86_64_int_parameter_registers[i]));
2860 if (next_cum.sse_nregs)
2862 /* Now emit code to save SSE registers. The AX parameter contains number
2863 of SSE parameter registers used to call this function. We use
2864 sse_prologue_save insn template that produces computed jump across
2865 SSE saves. We need some preparation work to get this working. */
2867 label = gen_label_rtx ();
2868 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2870 /* Compute address to jump to :
2871 label - 5*eax + nnamed_sse_arguments*5 */
2872 tmp_reg = gen_reg_rtx (Pmode);
2873 nsse_reg = gen_reg_rtx (Pmode);
2874 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2875 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2876 gen_rtx_MULT (Pmode, nsse_reg,
2878 if (next_cum.sse_regno)
2881 gen_rtx_CONST (DImode,
2882 gen_rtx_PLUS (DImode,
2884 GEN_INT (next_cum.sse_regno * 4))));
2886 emit_move_insn (nsse_reg, label_ref);
2887 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2889 /* Compute address of memory block we save into. We always use pointer
2890 pointing 127 bytes after first byte to store - this is needed to keep
2891 instruction size limited by 4 bytes. */
2892 tmp_reg = gen_reg_rtx (Pmode);
2893 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2894 plus_constant (save_area,
2895 8 * REGPARM_MAX + 127)));
2896 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2897 set_mem_alias_set (mem, set);
2898 set_mem_align (mem, BITS_PER_WORD);
2900 /* And finally do the dirty job! */
2901 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2902 GEN_INT (next_cum.sse_regno), label));
2907 /* Implement va_start. */
2910 ix86_va_start (valist, nextarg)
2914 HOST_WIDE_INT words, n_gpr, n_fpr;
2915 tree f_gpr, f_fpr, f_ovf, f_sav;
2916 tree gpr, fpr, ovf, sav, t;
2918 /* Only 64bit target needs something special. */
2921 std_expand_builtin_va_start (valist, nextarg);
2925 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2926 f_fpr = TREE_CHAIN (f_gpr);
2927 f_ovf = TREE_CHAIN (f_fpr);
2928 f_sav = TREE_CHAIN (f_ovf);
2930 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2931 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2932 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2933 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2934 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2936 /* Count number of gp and fp argument registers used. */
2937 words = current_function_args_info.words;
2938 n_gpr = current_function_args_info.regno;
2939 n_fpr = current_function_args_info.sse_regno;
2941 if (TARGET_DEBUG_ARG)
2942 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2943 (int) words, (int) n_gpr, (int) n_fpr);
2945 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2946 build_int_2 (n_gpr * 8, 0));
2947 TREE_SIDE_EFFECTS (t) = 1;
2948 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2950 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2951 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2952 TREE_SIDE_EFFECTS (t) = 1;
2953 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2955 /* Find the overflow area. */
2956 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2958 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2959 build_int_2 (words * UNITS_PER_WORD, 0));
2960 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2961 TREE_SIDE_EFFECTS (t) = 1;
2962 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2964 /* Find the register save area.
2965 Prologue of the function save it right above stack frame. */
2966 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2967 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2968 TREE_SIDE_EFFECTS (t) = 1;
2969 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2972 /* Implement va_arg. */
2974 ix86_va_arg (valist, type)
2977 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2978 tree f_gpr, f_fpr, f_ovf, f_sav;
2979 tree gpr, fpr, ovf, sav, t;
2981 rtx lab_false, lab_over = NULL_RTX;
2986 /* Only 64bit target needs something special. */
2989 return std_expand_builtin_va_arg (valist, type);
2992 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2993 f_fpr = TREE_CHAIN (f_gpr);
2994 f_ovf = TREE_CHAIN (f_fpr);
2995 f_sav = TREE_CHAIN (f_ovf);
2997 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2998 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2999 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3000 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3001 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3003 size = int_size_in_bytes (type);
3006 /* Passed by reference. */
3008 type = build_pointer_type (type);
3009 size = int_size_in_bytes (type);
3011 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3013 container = construct_container (TYPE_MODE (type), type, 0,
3014 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3016 * Pull the value out of the saved registers ...
3019 addr_rtx = gen_reg_rtx (Pmode);
3023 rtx int_addr_rtx, sse_addr_rtx;
3024 int needed_intregs, needed_sseregs;
3027 lab_over = gen_label_rtx ();
3028 lab_false = gen_label_rtx ();
3030 examine_argument (TYPE_MODE (type), type, 0,
3031 &needed_intregs, &needed_sseregs);
3034 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3035 || TYPE_ALIGN (type) > 128);
3037 /* In case we are passing structure, verify that it is consecutive block
3038 on the register save area. If not we need to do moves. */
3039 if (!need_temp && !REG_P (container))
3041 /* Verify that all registers are strictly consecutive */
3042 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3046 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3048 rtx slot = XVECEXP (container, 0, i);
3049 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3050 || INTVAL (XEXP (slot, 1)) != i * 16)
3058 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3060 rtx slot = XVECEXP (container, 0, i);
3061 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3062 || INTVAL (XEXP (slot, 1)) != i * 8)
3069 int_addr_rtx = addr_rtx;
3070 sse_addr_rtx = addr_rtx;
3074 int_addr_rtx = gen_reg_rtx (Pmode);
3075 sse_addr_rtx = gen_reg_rtx (Pmode);
3077 /* First ensure that we fit completely in registers. */
3080 emit_cmp_and_jump_insns (expand_expr
3081 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3082 GEN_INT ((REGPARM_MAX - needed_intregs +
3083 1) * 8), GE, const1_rtx, SImode,
3088 emit_cmp_and_jump_insns (expand_expr
3089 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3090 GEN_INT ((SSE_REGPARM_MAX -
3091 needed_sseregs + 1) * 16 +
3092 REGPARM_MAX * 8), GE, const1_rtx,
3093 SImode, 1, lab_false);
3096 /* Compute index to start of area used for integer regs. */
3099 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3100 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3101 if (r != int_addr_rtx)
3102 emit_move_insn (int_addr_rtx, r);
3106 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3107 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3108 if (r != sse_addr_rtx)
3109 emit_move_insn (sse_addr_rtx, r);
3116 /* Never use the memory itself, as it has the alias set. */
3117 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3118 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3119 set_mem_alias_set (mem, get_varargs_alias_set ());
3120 set_mem_align (mem, BITS_PER_UNIT);
3122 for (i = 0; i < XVECLEN (container, 0); i++)
3124 rtx slot = XVECEXP (container, 0, i);
3125 rtx reg = XEXP (slot, 0);
3126 enum machine_mode mode = GET_MODE (reg);
3132 if (SSE_REGNO_P (REGNO (reg)))
3134 src_addr = sse_addr_rtx;
3135 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3139 src_addr = int_addr_rtx;
3140 src_offset = REGNO (reg) * 8;
3142 src_mem = gen_rtx_MEM (mode, src_addr);
3143 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3144 src_mem = adjust_address (src_mem, mode, src_offset);
3145 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3146 emit_move_insn (dest_mem, src_mem);