1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
65 /* Processor costs (relative to an add) */
67 struct processor_costs size_cost = { /* costs for tunning for size */
68 2, /* cost of an add instruction */
69 3, /* cost of a lea instruction */
70 2, /* variable shift costs */
71 3, /* constant shift costs */
72 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
73 0, /* cost of multiply per each bit set */
74 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
75 3, /* cost of movsx */
76 3, /* cost of movzx */
79 2, /* cost for loading QImode using movzbl */
80 {2, 2, 2}, /* cost of loading integer registers
81 in QImode, HImode and SImode.
82 Relative to reg-reg move (2). */
83 {2, 2, 2}, /* cost of storing integer registers */
84 2, /* cost of reg,reg fld/fst */
85 {2, 2, 2}, /* cost of loading fp registers
86 in SFmode, DFmode and XFmode */
87 {2, 2, 2}, /* cost of loading integer registers */
88 3, /* cost of moving MMX register */
89 {3, 3}, /* cost of loading MMX registers
90 in SImode and DImode */
91 {3, 3}, /* cost of storing MMX registers
92 in SImode and DImode */
93 3, /* cost of moving SSE register */
94 {3, 3, 3}, /* cost of loading SSE registers
95 in SImode, DImode and TImode */
96 {3, 3, 3}, /* cost of storing SSE registers
97 in SImode, DImode and TImode */
98 3, /* MMX or SSE register to integer */
99 0, /* size of prefetch block */
100 0, /* number of parallel prefetches */
102 2, /* cost of FADD and FSUB insns. */
103 2, /* cost of FMUL instruction. */
104 2, /* cost of FDIV instruction. */
105 2, /* cost of FABS instruction. */
106 2, /* cost of FCHS instruction. */
107 2, /* cost of FSQRT instruction. */
110 /* Processor costs (relative to an add) */
112 struct processor_costs i386_cost = { /* 386 specific costs */
113 1, /* cost of an add instruction */
114 1, /* cost of a lea instruction */
115 3, /* variable shift costs */
116 2, /* constant shift costs */
117 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
118 1, /* cost of multiply per each bit set */
119 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
120 3, /* cost of movsx */
121 2, /* cost of movzx */
122 15, /* "large" insn */
124 4, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {8, 8, 8}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {8, 8, 8}, /* cost of loading integer registers */
133 2, /* cost of moving MMX register */
134 {4, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {4, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3, /* MMX or SSE register to integer */
144 0, /* size of prefetch block */
145 0, /* number of parallel prefetches */
147 23, /* cost of FADD and FSUB insns. */
148 27, /* cost of FMUL instruction. */
149 88, /* cost of FDIV instruction. */
150 22, /* cost of FABS instruction. */
151 24, /* cost of FCHS instruction. */
152 122, /* cost of FSQRT instruction. */
156 struct processor_costs i486_cost = { /* 486 specific costs */
157 1, /* cost of an add instruction */
158 1, /* cost of a lea instruction */
159 3, /* variable shift costs */
160 2, /* constant shift costs */
161 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
162 1, /* cost of multiply per each bit set */
163 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
164 3, /* cost of movsx */
165 2, /* cost of movzx */
166 15, /* "large" insn */
168 4, /* cost for loading QImode using movzbl */
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
171 Relative to reg-reg move (2). */
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
176 {8, 8, 8}, /* cost of loading integer registers */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of prefetch block */
189 0, /* number of parallel prefetches */
191 8, /* cost of FADD and FSUB insns. */
192 16, /* cost of FMUL instruction. */
193 73, /* cost of FDIV instruction. */
194 3, /* cost of FABS instruction. */
195 3, /* cost of FCHS instruction. */
196 83, /* cost of FSQRT instruction. */
200 struct processor_costs pentium_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 4, /* variable shift costs */
204 1, /* constant shift costs */
205 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
208 3, /* cost of movsx */
209 2, /* cost of movzx */
210 8, /* "large" insn */
212 6, /* cost for loading QImode using movzbl */
213 {2, 4, 2}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 4, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 8, /* cost of moving MMX register */
222 {8, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {8, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
235 3, /* cost of FADD and FSUB insns. */
236 3, /* cost of FMUL instruction. */
237 39, /* cost of FDIV instruction. */
238 1, /* cost of FABS instruction. */
239 1, /* cost of FCHS instruction. */
240 70, /* cost of FSQRT instruction. */
244 struct processor_costs pentiumpro_cost = {
245 1, /* cost of an add instruction */
246 1, /* cost of a lea instruction */
247 1, /* variable shift costs */
248 1, /* constant shift costs */
249 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
250 0, /* cost of multiply per each bit set */
251 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
252 1, /* cost of movsx */
253 1, /* cost of movzx */
254 8, /* "large" insn */
256 2, /* cost for loading QImode using movzbl */
257 {4, 4, 4}, /* cost of loading integer registers
258 in QImode, HImode and SImode.
259 Relative to reg-reg move (2). */
260 {2, 2, 2}, /* cost of storing integer registers */
261 2, /* cost of reg,reg fld/fst */
262 {2, 2, 6}, /* cost of loading fp registers
263 in SFmode, DFmode and XFmode */
264 {4, 4, 6}, /* cost of loading integer registers */
265 2, /* cost of moving MMX register */
266 {2, 2}, /* cost of loading MMX registers
267 in SImode and DImode */
268 {2, 2}, /* cost of storing MMX registers
269 in SImode and DImode */
270 2, /* cost of moving SSE register */
271 {2, 2, 8}, /* cost of loading SSE registers
272 in SImode, DImode and TImode */
273 {2, 2, 8}, /* cost of storing SSE registers
274 in SImode, DImode and TImode */
275 3, /* MMX or SSE register to integer */
276 32, /* size of prefetch block */
277 6, /* number of parallel prefetches */
279 3, /* cost of FADD and FSUB insns. */
280 5, /* cost of FMUL instruction. */
281 56, /* cost of FDIV instruction. */
282 2, /* cost of FABS instruction. */
283 2, /* cost of FCHS instruction. */
284 56, /* cost of FSQRT instruction. */
288 struct processor_costs k6_cost = {
289 1, /* cost of an add instruction */
290 2, /* cost of a lea instruction */
291 1, /* variable shift costs */
292 1, /* constant shift costs */
293 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
294 0, /* cost of multiply per each bit set */
295 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
296 2, /* cost of movsx */
297 2, /* cost of movzx */
298 8, /* "large" insn */
300 3, /* cost for loading QImode using movzbl */
301 {4, 5, 4}, /* cost of loading integer registers
302 in QImode, HImode and SImode.
303 Relative to reg-reg move (2). */
304 {2, 3, 2}, /* cost of storing integer registers */
305 4, /* cost of reg,reg fld/fst */
306 {6, 6, 6}, /* cost of loading fp registers
307 in SFmode, DFmode and XFmode */
308 {4, 4, 4}, /* cost of loading integer registers */
309 2, /* cost of moving MMX register */
310 {2, 2}, /* cost of loading MMX registers
311 in SImode and DImode */
312 {2, 2}, /* cost of storing MMX registers
313 in SImode and DImode */
314 2, /* cost of moving SSE register */
315 {2, 2, 8}, /* cost of loading SSE registers
316 in SImode, DImode and TImode */
317 {2, 2, 8}, /* cost of storing SSE registers
318 in SImode, DImode and TImode */
319 6, /* MMX or SSE register to integer */
320 32, /* size of prefetch block */
321 1, /* number of parallel prefetches */
323 2, /* cost of FADD and FSUB insns. */
324 2, /* cost of FMUL instruction. */
325 56, /* cost of FDIV instruction. */
326 2, /* cost of FABS instruction. */
327 2, /* cost of FCHS instruction. */
328 56, /* cost of FSQRT instruction. */
332 struct processor_costs athlon_cost = {
333 1, /* cost of an add instruction */
334 2, /* cost of a lea instruction */
335 1, /* variable shift costs */
336 1, /* constant shift costs */
337 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
338 0, /* cost of multiply per each bit set */
339 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
340 1, /* cost of movsx */
341 1, /* cost of movzx */
342 8, /* "large" insn */
344 4, /* cost for loading QImode using movzbl */
345 {3, 4, 3}, /* cost of loading integer registers
346 in QImode, HImode and SImode.
347 Relative to reg-reg move (2). */
348 {3, 4, 3}, /* cost of storing integer registers */
349 4, /* cost of reg,reg fld/fst */
350 {4, 4, 12}, /* cost of loading fp registers
351 in SFmode, DFmode and XFmode */
352 {6, 6, 8}, /* cost of loading integer registers */
353 2, /* cost of moving MMX register */
354 {4, 4}, /* cost of loading MMX registers
355 in SImode and DImode */
356 {4, 4}, /* cost of storing MMX registers
357 in SImode and DImode */
358 2, /* cost of moving SSE register */
359 {4, 4, 6}, /* cost of loading SSE registers
360 in SImode, DImode and TImode */
361 {4, 4, 5}, /* cost of storing SSE registers
362 in SImode, DImode and TImode */
363 5, /* MMX or SSE register to integer */
364 64, /* size of prefetch block */
365 6, /* number of parallel prefetches */
367 4, /* cost of FADD and FSUB insns. */
368 4, /* cost of FMUL instruction. */
369 24, /* cost of FDIV instruction. */
370 2, /* cost of FABS instruction. */
371 2, /* cost of FCHS instruction. */
372 35, /* cost of FSQRT instruction. */
376 struct processor_costs k8_cost = {
377 1, /* cost of an add instruction */
378 2, /* cost of a lea instruction */
379 1, /* variable shift costs */
380 1, /* constant shift costs */
381 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
382 0, /* cost of multiply per each bit set */
383 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
384 1, /* cost of movsx */
385 1, /* cost of movzx */
386 8, /* "large" insn */
388 4, /* cost for loading QImode using movzbl */
389 {3, 4, 3}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {3, 4, 3}, /* cost of storing integer registers */
393 4, /* cost of reg,reg fld/fst */
394 {4, 4, 12}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {6, 6, 8}, /* cost of loading integer registers */
397 2, /* cost of moving MMX register */
398 {3, 3}, /* cost of loading MMX registers
399 in SImode and DImode */
400 {4, 4}, /* cost of storing MMX registers
401 in SImode and DImode */
402 2, /* cost of moving SSE register */
403 {4, 3, 6}, /* cost of loading SSE registers
404 in SImode, DImode and TImode */
405 {4, 4, 5}, /* cost of storing SSE registers
406 in SImode, DImode and TImode */
407 5, /* MMX or SSE register to integer */
408 64, /* size of prefetch block */
409 6, /* number of parallel prefetches */
411 4, /* cost of FADD and FSUB insns. */
412 4, /* cost of FMUL instruction. */
413 19, /* cost of FDIV instruction. */
414 2, /* cost of FABS instruction. */
415 2, /* cost of FCHS instruction. */
416 35, /* cost of FSQRT instruction. */
420 struct processor_costs pentium4_cost = {
421 1, /* cost of an add instruction */
422 3, /* cost of a lea instruction */
423 4, /* variable shift costs */
424 4, /* constant shift costs */
425 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
426 0, /* cost of multiply per each bit set */
427 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
428 1, /* cost of movsx */
429 1, /* cost of movzx */
430 16, /* "large" insn */
432 2, /* cost for loading QImode using movzbl */
433 {4, 5, 4}, /* cost of loading integer registers
434 in QImode, HImode and SImode.
435 Relative to reg-reg move (2). */
436 {2, 3, 2}, /* cost of storing integer registers */
437 2, /* cost of reg,reg fld/fst */
438 {2, 2, 6}, /* cost of loading fp registers
439 in SFmode, DFmode and XFmode */
440 {4, 4, 6}, /* cost of loading integer registers */
441 2, /* cost of moving MMX register */
442 {2, 2}, /* cost of loading MMX registers
443 in SImode and DImode */
444 {2, 2}, /* cost of storing MMX registers
445 in SImode and DImode */
446 12, /* cost of moving SSE register */
447 {12, 12, 12}, /* cost of loading SSE registers
448 in SImode, DImode and TImode */
449 {2, 2, 8}, /* cost of storing SSE registers
450 in SImode, DImode and TImode */
451 10, /* MMX or SSE register to integer */
452 64, /* size of prefetch block */
453 6, /* number of parallel prefetches */
455 5, /* cost of FADD and FSUB insns. */
456 7, /* cost of FMUL instruction. */
457 43, /* cost of FDIV instruction. */
458 2, /* cost of FABS instruction. */
459 2, /* cost of FCHS instruction. */
460 43, /* cost of FSQRT instruction. */
464 struct processor_costs nocona_cost = {
465 1, /* cost of an add instruction */
466 1, /* cost of a lea instruction */
467 1, /* variable shift costs */
468 1, /* constant shift costs */
469 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
470 0, /* cost of multiply per each bit set */
471 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
472 1, /* cost of movsx */
473 1, /* cost of movzx */
474 16, /* "large" insn */
476 4, /* cost for loading QImode using movzbl */
477 {4, 4, 4}, /* cost of loading integer registers
478 in QImode, HImode and SImode.
479 Relative to reg-reg move (2). */
480 {4, 4, 4}, /* cost of storing integer registers */
481 3, /* cost of reg,reg fld/fst */
482 {12, 12, 12}, /* cost of loading fp registers
483 in SFmode, DFmode and XFmode */
484 {4, 4, 4}, /* cost of loading integer registers */
485 6, /* cost of moving MMX register */
486 {12, 12}, /* cost of loading MMX registers
487 in SImode and DImode */
488 {12, 12}, /* cost of storing MMX registers
489 in SImode and DImode */
490 6, /* cost of moving SSE register */
491 {12, 12, 12}, /* cost of loading SSE registers
492 in SImode, DImode and TImode */
493 {12, 12, 12}, /* cost of storing SSE registers
494 in SImode, DImode and TImode */
495 8, /* MMX or SSE register to integer */
496 128, /* size of prefetch block */
497 8, /* number of parallel prefetches */
499 6, /* cost of FADD and FSUB insns. */
500 8, /* cost of FMUL instruction. */
501 40, /* cost of FDIV instruction. */
502 3, /* cost of FABS instruction. */
503 3, /* cost of FCHS instruction. */
504 44, /* cost of FSQRT instruction. */
507 const struct processor_costs *ix86_cost = &pentium_cost;
509 /* Processor feature/optimization bitmasks. */
510 #define m_386 (1<<PROCESSOR_I386)
511 #define m_486 (1<<PROCESSOR_I486)
512 #define m_PENT (1<<PROCESSOR_PENTIUM)
513 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
514 #define m_K6 (1<<PROCESSOR_K6)
515 #define m_ATHLON (1<<PROCESSOR_ATHLON)
516 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
517 #define m_K8 (1<<PROCESSOR_K8)
518 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
519 #define m_NOCONA (1<<PROCESSOR_NOCONA)
521 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
522 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
523 const int x86_zero_extend_with_and = m_486 | m_PENT;
524 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
525 const int x86_double_with_add = ~m_386;
526 const int x86_use_bit_test = m_386;
527 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
528 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
529 const int x86_fisttp = m_NOCONA;
530 const int x86_3dnow_a = m_ATHLON_K8;
531 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
532 /* Branch hints were put in P4 based on simulation result. But
533 after P4 was made, no performance benefit was observed with
534 branch hints. It also increases the code size. As the result,
535 icc never generates branch hints. */
536 const int x86_branch_hints = 0;
537 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
538 const int x86_partial_reg_stall = m_PPRO;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
608 AREG, DREG, CREG, BREG,
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
647 static int const x86_64_int_return_registers[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
736 /* Size of the register save area. */
737 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
739 /* Define the structure for the machine field in struct function. */
741 struct stack_local_entry GTY(())
746 struct stack_local_entry *next;
749 /* Structure describing stack frame layout.
750 Stack grows downward:
756 saved frame pointer if frame_pointer_needed
757 <- HARD_FRAME_POINTER
763 > to_allocate <- FRAME_POINTER
775 int outgoing_arguments_size;
778 HOST_WIDE_INT to_allocate;
779 /* The offsets relative to ARG_POINTER. */
780 HOST_WIDE_INT frame_pointer_offset;
781 HOST_WIDE_INT hard_frame_pointer_offset;
782 HOST_WIDE_INT stack_pointer_offset;
784 /* When save_regs_using_mov is set, emit prologue using
785 move instead of push instructions. */
786 bool save_regs_using_mov;
789 /* Code model option. */
790 enum cmodel ix86_cmodel;
792 enum asm_dialect ix86_asm_dialect = ASM_ATT;
794 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
796 /* Which unit we are generating floating point math for. */
797 enum fpmath_unit ix86_fpmath;
799 /* Which cpu are we scheduling for. */
800 enum processor_type ix86_tune;
801 /* Which instruction set architecture to use. */
802 enum processor_type ix86_arch;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 static int ix86_regparm;
810 /* Preferred alignment for stack boundary in bits. */
811 unsigned int ix86_preferred_stack_boundary;
813 /* Values 1-5: see jump.c */
814 int ix86_branch_cost;
816 /* Variables which are this size or smaller are put in the data/bss
817 or ldata/lbss sections. */
819 int ix86_section_threshold = 65536;
821 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
822 char internal_label_prefix[16];
823 int internal_label_prefix_len;
825 static bool ix86_handle_option (size_t, const char *, int);
826 static void output_pic_addr_const (FILE *, rtx, int);
827 static void put_condition_code (enum rtx_code, enum machine_mode,
829 static const char *get_some_local_dynamic_name (void);
830 static int get_some_local_dynamic_name_1 (rtx *, void *);
831 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
832 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
834 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
835 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
837 static rtx get_thread_pointer (int);
838 static rtx legitimize_tls_address (rtx, enum tls_model, int);
839 static void get_pc_thunk_name (char [32], unsigned int);
840 static rtx gen_push (rtx);
841 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
842 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
843 static struct machine_function * ix86_init_machine_status (void);
844 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
845 static int ix86_nsaved_regs (void);
846 static void ix86_emit_save_regs (void);
847 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
848 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
849 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
850 static HOST_WIDE_INT ix86_GOT_alias_set (void);
851 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
852 static rtx ix86_expand_aligntest (rtx, int);
853 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
854 static int ix86_issue_rate (void);
855 static int ix86_adjust_cost (rtx, rtx, rtx, int);
856 static int ia32_multipass_dfa_lookahead (void);
857 static void ix86_init_mmx_sse_builtins (void);
858 static rtx x86_this_parameter (tree);
859 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
860 HOST_WIDE_INT, tree);
861 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
862 static void x86_file_start (void);
863 static void ix86_reorg (void);
864 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
865 static tree ix86_build_builtin_va_list (void);
866 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
868 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
869 static bool ix86_vector_mode_supported_p (enum machine_mode);
871 static int ix86_address_cost (rtx);
872 static bool ix86_cannot_force_const_mem (rtx);
873 static rtx ix86_delegitimize_address (rtx);
875 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
877 struct builtin_description;
878 static rtx ix86_expand_sse_comi (const struct builtin_description *,
880 static rtx ix86_expand_sse_compare (const struct builtin_description *,
882 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
883 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
884 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
885 static rtx ix86_expand_store_builtin (enum insn_code, tree);
886 static rtx safe_vector_operand (rtx, enum machine_mode);
887 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
888 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
889 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
890 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
891 static int ix86_fp_comparison_cost (enum rtx_code code);
892 static unsigned int ix86_select_alt_pic_regnum (void);
893 static int ix86_save_reg (unsigned int, int);
894 static void ix86_compute_frame_layout (struct ix86_frame *);
895 static int ix86_comp_type_attributes (tree, tree);
896 static int ix86_function_regparm (tree, tree);
897 const struct attribute_spec ix86_attribute_table[];
898 static bool ix86_function_ok_for_sibcall (tree, tree);
899 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
900 static int ix86_value_regno (enum machine_mode, tree, tree);
901 static bool contains_128bit_aligned_vector_p (tree);
902 static rtx ix86_struct_value_rtx (tree, int);
903 static bool ix86_ms_bitfield_layout_p (tree);
904 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
905 static int extended_reg_mentioned_1 (rtx *, void *);
906 static bool ix86_rtx_costs (rtx, int, int, int *);
907 static int min_insn_size (rtx);
908 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
909 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
910 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
912 static void ix86_init_builtins (void);
913 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
914 static const char *ix86_mangle_fundamental_type (tree);
915 static tree ix86_stack_protect_fail (void);
916 static rtx ix86_internal_arg_pointer (void);
917 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
919 /* This function is only used on Solaris. */
920 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
923 /* Register class used for passing given 64bit part of the argument.
924 These represent classes as documented by the PS ABI, with the exception
925 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
926 use SF or DFmode move instead of DImode to avoid reformatting penalties.
928 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
929 whenever possible (upper half does contain padding).
931 enum x86_64_reg_class
934 X86_64_INTEGER_CLASS,
935 X86_64_INTEGERSI_CLASS,
942 X86_64_COMPLEX_X87_CLASS,
945 static const char * const x86_64_reg_class_name[] = {
946 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
947 "sseup", "x87", "x87up", "cplx87", "no"
950 #define MAX_CLASSES 4
952 /* Table of constants used by fldpi, fldln2, etc.... */
953 static REAL_VALUE_TYPE ext_80387_constants_table [5];
954 static bool ext_80387_constants_init = 0;
955 static void init_ext_80387_constants (void);
956 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
957 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
958 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
959 static void x86_64_elf_select_section (tree decl, int reloc,
960 unsigned HOST_WIDE_INT align)
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
976 #undef TARGET_EXPAND_BUILTIN
977 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979 #undef TARGET_ASM_FUNCTION_EPILOGUE
980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982 #undef TARGET_ENCODE_SECTION_INFO
983 #ifndef SUBTARGET_ENCODE_SECTION_INFO
984 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
986 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
989 #undef TARGET_ASM_OPEN_PAREN
990 #define TARGET_ASM_OPEN_PAREN ""
991 #undef TARGET_ASM_CLOSE_PAREN
992 #define TARGET_ASM_CLOSE_PAREN ""
994 #undef TARGET_ASM_ALIGNED_HI_OP
995 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
996 #undef TARGET_ASM_ALIGNED_SI_OP
997 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
999 #undef TARGET_ASM_ALIGNED_DI_OP
1000 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1003 #undef TARGET_ASM_UNALIGNED_HI_OP
1004 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1005 #undef TARGET_ASM_UNALIGNED_SI_OP
1006 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1007 #undef TARGET_ASM_UNALIGNED_DI_OP
1008 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010 #undef TARGET_SCHED_ADJUST_COST
1011 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1012 #undef TARGET_SCHED_ISSUE_RATE
1013 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1014 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1015 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1016 ia32_multipass_dfa_lookahead
1018 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1019 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1022 #undef TARGET_HAVE_TLS
1023 #define TARGET_HAVE_TLS true
1025 #undef TARGET_CANNOT_FORCE_CONST_MEM
1026 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1028 #undef TARGET_DELEGITIMIZE_ADDRESS
1029 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1031 #undef TARGET_MS_BITFIELD_LAYOUT_P
1032 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1035 #undef TARGET_BINDS_LOCAL_P
1036 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1047 #undef TARGET_DEFAULT_TARGET_FLAGS
1048 #define TARGET_DEFAULT_TARGET_FLAGS \
1050 | TARGET_64BIT_DEFAULT \
1051 | TARGET_SUBTARGET_DEFAULT \
1052 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1054 #undef TARGET_HANDLE_OPTION
1055 #define TARGET_HANDLE_OPTION ix86_handle_option
1057 #undef TARGET_RTX_COSTS
1058 #define TARGET_RTX_COSTS ix86_rtx_costs
1059 #undef TARGET_ADDRESS_COST
1060 #define TARGET_ADDRESS_COST ix86_address_cost
1062 #undef TARGET_FIXED_CONDITION_CODE_REGS
1063 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064 #undef TARGET_CC_MODES_COMPATIBLE
1065 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1067 #undef TARGET_MACHINE_DEPENDENT_REORG
1068 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1070 #undef TARGET_BUILD_BUILTIN_VA_LIST
1071 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1073 #undef TARGET_MD_ASM_CLOBBERS
1074 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1076 #undef TARGET_PROMOTE_PROTOTYPES
1077 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078 #undef TARGET_STRUCT_VALUE_RTX
1079 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080 #undef TARGET_SETUP_INCOMING_VARARGS
1081 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082 #undef TARGET_MUST_PASS_IN_STACK
1083 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084 #undef TARGET_PASS_BY_REFERENCE
1085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1086 #undef TARGET_INTERNAL_ARG_POINTER
1087 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1088 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1089 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1091 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1092 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1094 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1095 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1098 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1099 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1102 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1103 #undef TARGET_INSERT_ATTRIBUTES
1104 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1107 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1108 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1110 #undef TARGET_STACK_PROTECT_FAIL
1111 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1113 #undef TARGET_FUNCTION_VALUE
1114 #define TARGET_FUNCTION_VALUE ix86_function_value
1116 struct gcc_target targetm = TARGET_INITIALIZER;
1119 /* The svr4 ABI for the i386 says that records and unions are returned
1121 #ifndef DEFAULT_PCC_STRUCT_RETURN
1122 #define DEFAULT_PCC_STRUCT_RETURN 1
1125 /* Implement TARGET_HANDLE_OPTION. */
1128 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1135 target_flags &= ~MASK_3DNOW_A;
1136 target_flags_explicit |= MASK_3DNOW_A;
1143 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1144 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1151 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1152 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1159 target_flags &= ~MASK_SSE3;
1160 target_flags_explicit |= MASK_SSE3;
1169 /* Sometimes certain combinations of command options do not make
1170 sense on a particular target machine. You can define a macro
1171 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1172 defined, is executed once just after all the command options have
1175 Don't use this macro to turn on various extra optimizations for
1176 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1179 override_options (void)
1182 int ix86_tune_defaulted = 0;
1184 /* Comes from final.c -- no real reason to change it. */
1185 #define MAX_CODE_ALIGN 16
1189 const struct processor_costs *cost; /* Processor costs */
1190 const int target_enable; /* Target flags to enable. */
1191 const int target_disable; /* Target flags to disable. */
1192 const int align_loop; /* Default alignments. */
1193 const int align_loop_max_skip;
1194 const int align_jump;
1195 const int align_jump_max_skip;
1196 const int align_func;
1198 const processor_target_table[PROCESSOR_max] =
1200 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1201 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1202 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1203 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1204 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1205 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1206 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1207 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1208 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1211 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1214 const char *const name; /* processor name or nickname. */
1215 const enum processor_type processor;
1216 const enum pta_flags
1222 PTA_PREFETCH_SSE = 16,
1228 const processor_alias_table[] =
1230 {"i386", PROCESSOR_I386, 0},
1231 {"i486", PROCESSOR_I486, 0},
1232 {"i586", PROCESSOR_PENTIUM, 0},
1233 {"pentium", PROCESSOR_PENTIUM, 0},
1234 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1235 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1236 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1237 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1238 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1239 {"i686", PROCESSOR_PENTIUMPRO, 0},
1240 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1241 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1242 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1243 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1244 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1245 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1246 | PTA_MMX | PTA_PREFETCH_SSE},
1247 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1248 | PTA_MMX | PTA_PREFETCH_SSE},
1249 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1250 | PTA_MMX | PTA_PREFETCH_SSE},
1251 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1252 | PTA_MMX | PTA_PREFETCH_SSE},
1253 {"k6", PROCESSOR_K6, PTA_MMX},
1254 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1255 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1256 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1258 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1259 | PTA_3DNOW | PTA_3DNOW_A},
1260 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1261 | PTA_3DNOW_A | PTA_SSE},
1262 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1263 | PTA_3DNOW_A | PTA_SSE},
1264 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1265 | PTA_3DNOW_A | PTA_SSE},
1266 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1267 | PTA_SSE | PTA_SSE2 },
1268 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1269 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1270 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1271 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1272 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1273 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1274 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1275 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1278 int const pta_size = ARRAY_SIZE (processor_alias_table);
1280 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1281 SUBTARGET_OVERRIDE_OPTIONS;
1284 /* Set the default values for switches whose default depends on TARGET_64BIT
1285 in case they weren't overwritten by command line options. */
1288 if (flag_omit_frame_pointer == 2)
1289 flag_omit_frame_pointer = 1;
1290 if (flag_asynchronous_unwind_tables == 2)
1291 flag_asynchronous_unwind_tables = 1;
1292 if (flag_pcc_struct_return == 2)
1293 flag_pcc_struct_return = 0;
1297 if (flag_omit_frame_pointer == 2)
1298 flag_omit_frame_pointer = 0;
1299 if (flag_asynchronous_unwind_tables == 2)
1300 flag_asynchronous_unwind_tables = 0;
1301 if (flag_pcc_struct_return == 2)
1302 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1305 if (!ix86_tune_string && ix86_arch_string)
1306 ix86_tune_string = ix86_arch_string;
1307 if (!ix86_tune_string)
1309 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1310 ix86_tune_defaulted = 1;
1312 if (!ix86_arch_string)
1313 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1315 if (ix86_cmodel_string != 0)
1317 if (!strcmp (ix86_cmodel_string, "small"))
1318 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1319 else if (!strcmp (ix86_cmodel_string, "medium"))
1320 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1322 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1323 else if (!strcmp (ix86_cmodel_string, "32"))
1324 ix86_cmodel = CM_32;
1325 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1326 ix86_cmodel = CM_KERNEL;
1327 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1328 ix86_cmodel = CM_LARGE;
1330 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1334 ix86_cmodel = CM_32;
1336 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1338 if (ix86_asm_string != 0)
1341 && !strcmp (ix86_asm_string, "intel"))
1342 ix86_asm_dialect = ASM_INTEL;
1343 else if (!strcmp (ix86_asm_string, "att"))
1344 ix86_asm_dialect = ASM_ATT;
1346 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1348 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1349 error ("code model %qs not supported in the %s bit mode",
1350 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1351 if (ix86_cmodel == CM_LARGE)
1352 sorry ("code model %<large%> not supported yet");
1353 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1354 sorry ("%i-bit mode not compiled in",
1355 (target_flags & MASK_64BIT) ? 64 : 32);
1357 for (i = 0; i < pta_size; i++)
1358 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1360 ix86_arch = processor_alias_table[i].processor;
1361 /* Default cpu tuning to the architecture. */
1362 ix86_tune = ix86_arch;
1363 if (processor_alias_table[i].flags & PTA_MMX
1364 && !(target_flags_explicit & MASK_MMX))
1365 target_flags |= MASK_MMX;
1366 if (processor_alias_table[i].flags & PTA_3DNOW
1367 && !(target_flags_explicit & MASK_3DNOW))
1368 target_flags |= MASK_3DNOW;
1369 if (processor_alias_table[i].flags & PTA_3DNOW_A
1370 && !(target_flags_explicit & MASK_3DNOW_A))
1371 target_flags |= MASK_3DNOW_A;
1372 if (processor_alias_table[i].flags & PTA_SSE
1373 && !(target_flags_explicit & MASK_SSE))
1374 target_flags |= MASK_SSE;
1375 if (processor_alias_table[i].flags & PTA_SSE2
1376 && !(target_flags_explicit & MASK_SSE2))
1377 target_flags |= MASK_SSE2;
1378 if (processor_alias_table[i].flags & PTA_SSE3
1379 && !(target_flags_explicit & MASK_SSE3))
1380 target_flags |= MASK_SSE3;
1381 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1382 x86_prefetch_sse = true;
1383 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1384 error ("CPU you selected does not support x86-64 "
1390 error ("bad value (%s) for -march= switch", ix86_arch_string);
1392 for (i = 0; i < pta_size; i++)
1393 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1395 ix86_tune = processor_alias_table[i].processor;
1396 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1398 if (ix86_tune_defaulted)
1400 ix86_tune_string = "x86-64";
1401 for (i = 0; i < pta_size; i++)
1402 if (! strcmp (ix86_tune_string,
1403 processor_alias_table[i].name))
1405 ix86_tune = processor_alias_table[i].processor;
1408 error ("CPU you selected does not support x86-64 "
1411 /* Intel CPUs have always interpreted SSE prefetch instructions as
1412 NOPs; so, we can enable SSE prefetch instructions even when
1413 -mtune (rather than -march) points us to a processor that has them.
1414 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1415 higher processors. */
1416 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1417 x86_prefetch_sse = true;
1421 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1424 ix86_cost = &size_cost;
1426 ix86_cost = processor_target_table[ix86_tune].cost;
1427 target_flags |= processor_target_table[ix86_tune].target_enable;
1428 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1430 /* Arrange to set up i386_stack_locals for all functions. */
1431 init_machine_status = ix86_init_machine_status;
1433 /* Validate -mregparm= value. */
1434 if (ix86_regparm_string)
1436 i = atoi (ix86_regparm_string);
1437 if (i < 0 || i > REGPARM_MAX)
1438 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1444 ix86_regparm = REGPARM_MAX;
1446 /* If the user has provided any of the -malign-* options,
1447 warn and use that value only if -falign-* is not set.
1448 Remove this code in GCC 3.2 or later. */
1449 if (ix86_align_loops_string)
1451 warning (0, "-malign-loops is obsolete, use -falign-loops");
1452 if (align_loops == 0)
1454 i = atoi (ix86_align_loops_string);
1455 if (i < 0 || i > MAX_CODE_ALIGN)
1456 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1458 align_loops = 1 << i;
1462 if (ix86_align_jumps_string)
1464 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1465 if (align_jumps == 0)
1467 i = atoi (ix86_align_jumps_string);
1468 if (i < 0 || i > MAX_CODE_ALIGN)
1469 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1471 align_jumps = 1 << i;
1475 if (ix86_align_funcs_string)
1477 warning (0, "-malign-functions is obsolete, use -falign-functions");
1478 if (align_functions == 0)
1480 i = atoi (ix86_align_funcs_string);
1481 if (i < 0 || i > MAX_CODE_ALIGN)
1482 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1484 align_functions = 1 << i;
1488 /* Default align_* from the processor table. */
1489 if (align_loops == 0)
1491 align_loops = processor_target_table[ix86_tune].align_loop;
1492 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1494 if (align_jumps == 0)
1496 align_jumps = processor_target_table[ix86_tune].align_jump;
1497 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1499 if (align_functions == 0)
1501 align_functions = processor_target_table[ix86_tune].align_func;
1504 /* Validate -mpreferred-stack-boundary= value, or provide default.
1505 The default of 128 bits is for Pentium III's SSE __m128, but we
1506 don't want additional code to keep the stack aligned when
1507 optimizing for code size. */
1508 ix86_preferred_stack_boundary = (optimize_size
1509 ? TARGET_64BIT ? 128 : 32
1511 if (ix86_preferred_stack_boundary_string)
1513 i = atoi (ix86_preferred_stack_boundary_string);
1514 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1515 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1516 TARGET_64BIT ? 4 : 2);
1518 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1521 /* Validate -mbranch-cost= value, or provide default. */
1522 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1523 if (ix86_branch_cost_string)
1525 i = atoi (ix86_branch_cost_string);
1527 error ("-mbranch-cost=%d is not between 0 and 5", i);
1529 ix86_branch_cost = i;
1531 if (ix86_section_threshold_string)
1533 i = atoi (ix86_section_threshold_string);
1535 error ("-mlarge-data-threshold=%d is negative", i);
1537 ix86_section_threshold = i;
1540 if (ix86_tls_dialect_string)
1542 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1543 ix86_tls_dialect = TLS_DIALECT_GNU;
1544 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1545 ix86_tls_dialect = TLS_DIALECT_SUN;
1547 error ("bad value (%s) for -mtls-dialect= switch",
1548 ix86_tls_dialect_string);
1551 /* Keep nonleaf frame pointers. */
1552 if (flag_omit_frame_pointer)
1553 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1554 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1555 flag_omit_frame_pointer = 1;
1557 /* If we're doing fast math, we don't care about comparison order
1558 wrt NaNs. This lets us use a shorter comparison sequence. */
1559 if (flag_unsafe_math_optimizations)
1560 target_flags &= ~MASK_IEEE_FP;
1562 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1563 since the insns won't need emulation. */
1564 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1565 target_flags &= ~MASK_NO_FANCY_MATH_387;
1567 /* Likewise, if the target doesn't have a 387, or we've specified
1568 software floating point, don't use 387 inline intrinsics. */
1570 target_flags |= MASK_NO_FANCY_MATH_387;
1572 /* Turn on SSE2 builtins for -msse3. */
1574 target_flags |= MASK_SSE2;
1576 /* Turn on SSE builtins for -msse2. */
1578 target_flags |= MASK_SSE;
1580 /* Turn on MMX builtins for -msse. */
1583 target_flags |= MASK_MMX & ~target_flags_explicit;
1584 x86_prefetch_sse = true;
1587 /* Turn on MMX builtins for 3Dnow. */
1589 target_flags |= MASK_MMX;
1593 if (TARGET_ALIGN_DOUBLE)
1594 error ("-malign-double makes no sense in the 64bit mode");
1596 error ("-mrtd calling convention not supported in the 64bit mode");
1598 /* Enable by default the SSE and MMX builtins. Do allow the user to
1599 explicitly disable any of these. In particular, disabling SSE and
1600 MMX for kernel code is extremely useful. */
1602 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1603 & ~target_flags_explicit);
1607 /* i386 ABI does not specify red zone. It still makes sense to use it
1608 when programmer takes care to stack from being destroyed. */
1609 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1610 target_flags |= MASK_NO_RED_ZONE;
1613 /* Accept -msseregparm only if at least SSE support is enabled. */
1614 if (TARGET_SSEREGPARM
1616 error ("-msseregparm used without SSE enabled");
1618 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1620 if (ix86_fpmath_string != 0)
1622 if (! strcmp (ix86_fpmath_string, "387"))
1623 ix86_fpmath = FPMATH_387;
1624 else if (! strcmp (ix86_fpmath_string, "sse"))
1628 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1629 ix86_fpmath = FPMATH_387;
1632 ix86_fpmath = FPMATH_SSE;
1634 else if (! strcmp (ix86_fpmath_string, "387,sse")
1635 || ! strcmp (ix86_fpmath_string, "sse,387"))
1639 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1640 ix86_fpmath = FPMATH_387;
1642 else if (!TARGET_80387)
1644 warning (0, "387 instruction set disabled, using SSE arithmetics");
1645 ix86_fpmath = FPMATH_SSE;
1648 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1651 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1654 /* If the i387 is disabled, then do not return values in it. */
1656 target_flags &= ~MASK_FLOAT_RETURNS;
1658 if ((x86_accumulate_outgoing_args & TUNEMASK)
1659 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1663 /* ??? Unwind info is not correct around the CFG unless either a frame
1664 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1665 unwind info generation to be aware of the CFG and propagating states
1667 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1668 || flag_exceptions || flag_non_call_exceptions)
1669 && flag_omit_frame_pointer
1670 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1672 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1673 warning (0, "unwind tables currently require either a frame pointer "
1674 "or -maccumulate-outgoing-args for correctness");
1675 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1678 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1681 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1682 p = strchr (internal_label_prefix, 'X');
1683 internal_label_prefix_len = p - internal_label_prefix;
1687 /* When scheduling description is not available, disable scheduler pass
1688 so it won't slow down the compilation and make x87 code slower. */
1689 if (!TARGET_SCHEDULE)
1690 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1693 /* switch to the appropriate section for output of DECL.
1694 DECL is either a `VAR_DECL' node or a constant of some sort.
1695 RELOC indicates whether forming the initial value of DECL requires
1696 link-time relocations. */
1699 x86_64_elf_select_section (tree decl, int reloc,
1700 unsigned HOST_WIDE_INT align)
1702 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1703 && ix86_in_large_data_p (decl))
1705 const char *sname = NULL;
1706 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1711 case SECCAT_DATA_REL:
1712 sname = ".ldata.rel";
1714 case SECCAT_DATA_REL_LOCAL:
1715 sname = ".ldata.rel.local";
1717 case SECCAT_DATA_REL_RO:
1718 sname = ".ldata.rel.ro";
1720 case SECCAT_DATA_REL_RO_LOCAL:
1721 sname = ".ldata.rel.ro.local";
1727 case SECCAT_RODATA_MERGE_STR:
1728 case SECCAT_RODATA_MERGE_STR_INIT:
1729 case SECCAT_RODATA_MERGE_CONST:
1732 case SECCAT_SRODATA:
1739 /* We don't split these for medium model. Place them into
1740 default sections and hope for best. */
1745 named_section (decl, sname, reloc);
1749 default_elf_select_section (decl, reloc, align);
1752 /* Build up a unique section name, expressed as a
1753 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
1754 RELOC indicates whether the initial value of EXP requires
1755 link-time relocations. */
1758 x86_64_elf_unique_section (tree decl, int reloc)
1760 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1761 && ix86_in_large_data_p (decl))
1763 const char *prefix = NULL;
1764 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
1765 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
1767 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1770 case SECCAT_DATA_REL:
1771 case SECCAT_DATA_REL_LOCAL:
1772 case SECCAT_DATA_REL_RO:
1773 case SECCAT_DATA_REL_RO_LOCAL:
1774 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
1777 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
1780 case SECCAT_RODATA_MERGE_STR:
1781 case SECCAT_RODATA_MERGE_STR_INIT:
1782 case SECCAT_RODATA_MERGE_CONST:
1783 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
1785 case SECCAT_SRODATA:
1792 /* We don't split these for medium model. Place them into
1793 default sections and hope for best. */
1801 plen = strlen (prefix);
1803 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1804 name = targetm.strip_name_encoding (name);
1805 nlen = strlen (name);
1807 string = alloca (nlen + plen + 1);
1808 memcpy (string, prefix, plen);
1809 memcpy (string + plen, name, nlen + 1);
1811 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
1815 default_unique_section (decl, reloc);
1818 #ifdef COMMON_ASM_OP
1819 /* This says how to output assembler code to declare an
1820 uninitialized external linkage data object.
1822 For medium model x86-64 we need to use .largecomm opcode for
1825 x86_elf_aligned_common (FILE *file,
1826 const char *name, unsigned HOST_WIDE_INT size,
1829 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1830 && size > (unsigned int)ix86_section_threshold)
1831 fprintf (file, ".largecomm\t");
1833 fprintf (file, "%s", COMMON_ASM_OP);
1834 assemble_name (file, name);
1835 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
1836 size, align / BITS_PER_UNIT);
1839 /* Utility function for targets to use in implementing
1840 ASM_OUTPUT_ALIGNED_BSS. */
1843 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
1844 const char *name, unsigned HOST_WIDE_INT size,
1847 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1848 && size > (unsigned int)ix86_section_threshold)
1849 named_section (decl, ".lbss", 0);
1852 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
1853 #ifdef ASM_DECLARE_OBJECT_NAME
1854 last_assemble_variable_decl = decl;
1855 ASM_DECLARE_OBJECT_NAME (file, name, decl);
1857 /* Standard thing is just output label for the object. */
1858 ASM_OUTPUT_LABEL (file, name);
1859 #endif /* ASM_DECLARE_OBJECT_NAME */
1860 ASM_OUTPUT_SKIP (file, size ? size : 1);
1865 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1867 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1868 make the problem with not enough registers even worse. */
1869 #ifdef INSN_SCHEDULING
1871 flag_schedule_insns = 0;
1875 /* The Darwin libraries never set errno, so we might as well
1876 avoid calling them when that's the only reason we would. */
1877 flag_errno_math = 0;
1879 /* The default values of these switches depend on the TARGET_64BIT
1880 that is not known at this moment. Mark these values with 2 and
1881 let user the to override these. In case there is no command line option
1882 specifying them, we will set the defaults in override_options. */
1884 flag_omit_frame_pointer = 2;
1885 flag_pcc_struct_return = 2;
1886 flag_asynchronous_unwind_tables = 2;
1887 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1888 SUBTARGET_OPTIMIZATION_OPTIONS;
1892 /* Table of valid machine attributes. */
1893 const struct attribute_spec ix86_attribute_table[] =
1895 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1896 /* Stdcall attribute says callee is responsible for popping arguments
1897 if they are not variable. */
1898 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1899 /* Fastcall attribute says callee is responsible for popping arguments
1900 if they are not variable. */
1901 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1902 /* Cdecl attribute says the callee is a normal C declaration */
1903 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1904 /* Regparm attribute specifies how many integer arguments are to be
1905 passed in registers. */
1906 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
1907 /* Sseregparm attribute says we are using x86_64 calling conventions
1908 for FP arguments. */
1909 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1910 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1911 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1912 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1913 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1915 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1916 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1917 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1918 SUBTARGET_ATTRIBUTE_TABLE,
1920 { NULL, 0, 0, false, false, false, NULL }
1923 /* Decide whether we can make a sibling call to a function. DECL is the
1924 declaration of the function being targeted by the call and EXP is the
1925 CALL_EXPR representing the call. */
1928 ix86_function_ok_for_sibcall (tree decl, tree exp)
1933 /* If we are generating position-independent code, we cannot sibcall
1934 optimize any indirect call, or a direct call to a global function,
1935 as the PLT requires %ebx be live. */
1936 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1943 func = TREE_TYPE (TREE_OPERAND (exp, 0));
1944 if (POINTER_TYPE_P (func))
1945 func = TREE_TYPE (func);
1948 /* Check that the return value locations are the same. Like
1949 if we are returning floats on the 80387 register stack, we cannot
1950 make a sibcall from a function that doesn't return a float to a
1951 function that does or, conversely, from a function that does return
1952 a float to a function that doesn't; the necessary stack adjustment
1953 would not be executed. This is also the place we notice
1954 differences in the return value ABI. Note that it is ok for one
1955 of the functions to have void return type as long as the return
1956 value of the other is passed in a register. */
1957 a = ix86_function_value (TREE_TYPE (exp), func, false);
1958 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1960 if (STACK_REG_P (a) || STACK_REG_P (b))
1962 if (!rtx_equal_p (a, b))
1965 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1967 else if (!rtx_equal_p (a, b))
1970 /* If this call is indirect, we'll need to be able to use a call-clobbered
1971 register for the address of the target function. Make sure that all
1972 such registers are not used for passing parameters. */
1973 if (!decl && !TARGET_64BIT)
1977 /* We're looking at the CALL_EXPR, we need the type of the function. */
1978 type = TREE_OPERAND (exp, 0); /* pointer expression */
1979 type = TREE_TYPE (type); /* pointer type */
1980 type = TREE_TYPE (type); /* function type */
1982 if (ix86_function_regparm (type, NULL) >= 3)
1984 /* ??? Need to count the actual number of registers to be used,
1985 not the possible number of registers. Fix later. */
1990 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1991 /* Dllimport'd functions are also called indirectly. */
1992 if (decl && DECL_DLLIMPORT_P (decl)
1993 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1997 /* If we forced aligned the stack, then sibcalling would unalign the
1998 stack, which may break the called function. */
1999 if (cfun->machine->force_align_arg_pointer)
2002 /* Otherwise okay. That also includes certain types of indirect calls. */
2006 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2007 calling convention attributes;
2008 arguments as in struct attribute_spec.handler. */
2011 ix86_handle_cconv_attribute (tree *node, tree name,
2013 int flags ATTRIBUTE_UNUSED,
2016 if (TREE_CODE (*node) != FUNCTION_TYPE
2017 && TREE_CODE (*node) != METHOD_TYPE
2018 && TREE_CODE (*node) != FIELD_DECL
2019 && TREE_CODE (*node) != TYPE_DECL)
2021 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2022 IDENTIFIER_POINTER (name));
2023 *no_add_attrs = true;
2027 /* Can combine regparm with all attributes but fastcall. */
2028 if (is_attribute_p ("regparm", name))
2032 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2034 error ("fastcall and regparm attributes are not compatible");
2037 cst = TREE_VALUE (args);
2038 if (TREE_CODE (cst) != INTEGER_CST)
2040 warning (OPT_Wattributes,
2041 "%qs attribute requires an integer constant argument",
2042 IDENTIFIER_POINTER (name));
2043 *no_add_attrs = true;
2045 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2047 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2048 IDENTIFIER_POINTER (name), REGPARM_MAX);
2049 *no_add_attrs = true;
2057 warning (OPT_Wattributes, "%qs attribute ignored",
2058 IDENTIFIER_POINTER (name));
2059 *no_add_attrs = true;
2063 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2064 if (is_attribute_p ("fastcall", name))
2066 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2068 error ("fastcall and cdecl attributes are not compatible");
2070 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2072 error ("fastcall and stdcall attributes are not compatible");
2074 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2076 error ("fastcall and regparm attributes are not compatible");
2080 /* Can combine stdcall with fastcall (redundant), regparm and
2082 else if (is_attribute_p ("stdcall", name))
2084 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2086 error ("stdcall and cdecl attributes are not compatible");
2088 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2090 error ("stdcall and fastcall attributes are not compatible");
2094 /* Can combine cdecl with regparm and sseregparm. */
2095 else if (is_attribute_p ("cdecl", name))
2097 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2099 error ("stdcall and cdecl attributes are not compatible");
2101 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2103 error ("fastcall and cdecl attributes are not compatible");
2107 /* Can combine sseregparm with all attributes. */
2112 /* Return 0 if the attributes for two types are incompatible, 1 if they
2113 are compatible, and 2 if they are nearly compatible (which causes a
2114 warning to be generated). */
2117 ix86_comp_type_attributes (tree type1, tree type2)
2119 /* Check for mismatch of non-default calling convention. */
2120 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2122 if (TREE_CODE (type1) != FUNCTION_TYPE)
2125 /* Check for mismatched fastcall/regparm types. */
2126 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2127 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2128 || (ix86_function_regparm (type1, NULL)
2129 != ix86_function_regparm (type2, NULL)))
2132 /* Check for mismatched sseregparm types. */
2133 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2134 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2137 /* Check for mismatched return types (cdecl vs stdcall). */
2138 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2139 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2145 /* Return the regparm value for a function with the indicated TYPE and DECL.
2146 DECL may be NULL when calling function indirectly
2147 or considering a libcall. */
2150 ix86_function_regparm (tree type, tree decl)
2153 int regparm = ix86_regparm;
2154 bool user_convention = false;
2158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2161 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2162 user_convention = true;
2165 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2168 user_convention = true;
2171 /* Use register calling convention for local functions when possible. */
2172 if (!TARGET_64BIT && !user_convention && decl
2173 && flag_unit_at_a_time && !profile_flag)
2175 struct cgraph_local_info *i = cgraph_local_info (decl);
2178 int local_regparm, globals = 0, regno;
2180 /* Make sure no regparm register is taken by a global register
2182 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2183 if (global_regs[local_regparm])
2185 /* We can't use regparm(3) for nested functions as these use
2186 static chain pointer in third argument. */
2187 if (local_regparm == 3
2188 && decl_function_context (decl)
2189 && !DECL_NO_STATIC_CHAIN (decl))
2191 /* Each global register variable increases register preassure,
2192 so the more global reg vars there are, the smaller regparm
2193 optimization use, unless requested by the user explicitly. */
2194 for (regno = 0; regno < 6; regno++)
2195 if (global_regs[regno])
2198 = globals < local_regparm ? local_regparm - globals : 0;
2200 if (local_regparm > regparm)
2201 regparm = local_regparm;
2208 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2209 in SSE registers for a function with the indicated TYPE and DECL.
2210 DECL may be NULL when calling function indirectly
2211 or considering a libcall. Otherwise return 0. */
2214 ix86_function_sseregparm (tree type, tree decl)
2216 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2217 by the sseregparm attribute. */
2218 if (TARGET_SSEREGPARM
2220 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2225 error ("Calling %qD with attribute sseregparm without "
2226 "SSE/SSE2 enabled", decl);
2228 error ("Calling %qT with attribute sseregparm without "
2229 "SSE/SSE2 enabled", type);
2236 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2237 in SSE registers even for 32-bit mode and not just 3, but up to
2238 8 SSE arguments in registers. */
2239 if (!TARGET_64BIT && decl
2240 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2242 struct cgraph_local_info *i = cgraph_local_info (decl);
2244 return TARGET_SSE2 ? 2 : 1;
2250 /* Return true if EAX is live at the start of the function. Used by
2251 ix86_expand_prologue to determine if we need special help before
2252 calling allocate_stack_worker. */
2255 ix86_eax_live_at_start_p (void)
2257 /* Cheat. Don't bother working forward from ix86_function_regparm
2258 to the function type to whether an actual argument is located in
2259 eax. Instead just look at cfg info, which is still close enough
2260 to correct at this point. This gives false positives for broken
2261 functions that might use uninitialized data that happens to be
2262 allocated in eax, but who cares? */
2263 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2266 /* Value is the number of bytes of arguments automatically
2267 popped when returning from a subroutine call.
2268 FUNDECL is the declaration node of the function (as a tree),
2269 FUNTYPE is the data type of the function (as a tree),
2270 or for a library call it is an identifier node for the subroutine name.
2271 SIZE is the number of bytes of arguments passed on the stack.
2273 On the 80386, the RTD insn may be used to pop them if the number
2274 of args is fixed, but if the number is variable then the caller
2275 must pop them all. RTD can't be used for library calls now
2276 because the library is compiled with the Unix compiler.
2277 Use of RTD is a selectable option, since it is incompatible with
2278 standard Unix calling sequences. If the option is not selected,
2279 the caller must always pop the args.
2281 The attribute stdcall is equivalent to RTD on a per module basis. */
2284 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2286 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2288 /* Cdecl functions override -mrtd, and never pop the stack. */
2289 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2291 /* Stdcall and fastcall functions will pop the stack if not
2293 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2294 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2298 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2299 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2300 == void_type_node)))
2304 /* Lose any fake structure return argument if it is passed on the stack. */
2305 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2307 && !KEEP_AGGREGATE_RETURN_POINTER)
2309 int nregs = ix86_function_regparm (funtype, fundecl);
2312 return GET_MODE_SIZE (Pmode);
2318 /* Argument support functions. */
2320 /* Return true when register may be used to pass function parameters. */
2322 ix86_function_arg_regno_p (int regno)
2326 return (regno < REGPARM_MAX
2327 || (TARGET_MMX && MMX_REGNO_P (regno)
2328 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2329 || (TARGET_SSE && SSE_REGNO_P (regno)
2330 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2332 if (TARGET_SSE && SSE_REGNO_P (regno)
2333 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2335 /* RAX is used as hidden argument to va_arg functions. */
2338 for (i = 0; i < REGPARM_MAX; i++)
2339 if (regno == x86_64_int_parameter_registers[i])
2344 /* Return if we do not know how to pass TYPE solely in registers. */
2347 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2349 if (must_pass_in_stack_var_size_or_pad (mode, type))
2352 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2353 The layout_type routine is crafty and tries to trick us into passing
2354 currently unsupported vector types on the stack by using TImode. */
2355 return (!TARGET_64BIT && mode == TImode
2356 && type && TREE_CODE (type) != VECTOR_TYPE);
2359 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2360 for a call to a function whose data type is FNTYPE.
2361 For a library call, FNTYPE is 0. */
2364 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2365 tree fntype, /* tree ptr for function decl */
2366 rtx libname, /* SYMBOL_REF of library name or 0 */
2369 static CUMULATIVE_ARGS zero_cum;
2370 tree param, next_param;
2372 if (TARGET_DEBUG_ARG)
2374 fprintf (stderr, "\ninit_cumulative_args (");
2376 fprintf (stderr, "fntype code = %s, ret code = %s",
2377 tree_code_name[(int) TREE_CODE (fntype)],
2378 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2380 fprintf (stderr, "no fntype");
2383 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2388 /* Set up the number of registers to use for passing arguments. */
2389 cum->nregs = ix86_regparm;
2391 cum->sse_nregs = SSE_REGPARM_MAX;
2393 cum->mmx_nregs = MMX_REGPARM_MAX;
2394 cum->warn_sse = true;
2395 cum->warn_mmx = true;
2396 cum->maybe_vaarg = false;
2398 /* Use ecx and edx registers if function has fastcall attribute,
2399 else look for regparm information. */
2400 if (fntype && !TARGET_64BIT)
2402 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2408 cum->nregs = ix86_function_regparm (fntype, fndecl);
2411 /* Set up the number of SSE registers used for passing SFmode
2412 and DFmode arguments. Warn for mismatching ABI. */
2413 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2415 /* Determine if this function has variable arguments. This is
2416 indicated by the last argument being 'void_type_mode' if there
2417 are no variable arguments. If there are variable arguments, then
2418 we won't pass anything in registers in 32-bit mode. */
2420 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2422 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2423 param != 0; param = next_param)
2425 next_param = TREE_CHAIN (param);
2426 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2436 cum->float_in_sse = 0;
2438 cum->maybe_vaarg = true;
2442 if ((!fntype && !libname)
2443 || (fntype && !TYPE_ARG_TYPES (fntype)))
2444 cum->maybe_vaarg = true;
2446 if (TARGET_DEBUG_ARG)
2447 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2452 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2453 But in the case of vector types, it is some vector mode.
2455 When we have only some of our vector isa extensions enabled, then there
2456 are some modes for which vector_mode_supported_p is false. For these
2457 modes, the generic vector support in gcc will choose some non-vector mode
2458 in order to implement the type. By computing the natural mode, we'll
2459 select the proper ABI location for the operand and not depend on whatever
2460 the middle-end decides to do with these vector types. */
2462 static enum machine_mode
2463 type_natural_mode (tree type)
2465 enum machine_mode mode = TYPE_MODE (type);
2467 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2469 HOST_WIDE_INT size = int_size_in_bytes (type);
2470 if ((size == 8 || size == 16)
2471 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2472 && TYPE_VECTOR_SUBPARTS (type) > 1)
2474 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2476 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2477 mode = MIN_MODE_VECTOR_FLOAT;
2479 mode = MIN_MODE_VECTOR_INT;
2481 /* Get the mode which has this inner mode and number of units. */
2482 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2483 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2484 && GET_MODE_INNER (mode) == innermode)
2494 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2495 this may not agree with the mode that the type system has chosen for the
2496 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2497 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2500 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2505 if (orig_mode != BLKmode)
2506 tmp = gen_rtx_REG (orig_mode, regno);
2509 tmp = gen_rtx_REG (mode, regno);
2510 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2511 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2517 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2518 of this code is to classify each 8bytes of incoming argument by the register
2519 class and assign registers accordingly. */
2521 /* Return the union class of CLASS1 and CLASS2.
2522 See the x86-64 PS ABI for details. */
2524 static enum x86_64_reg_class
2525 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2527 /* Rule #1: If both classes are equal, this is the resulting class. */
2528 if (class1 == class2)
2531 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2533 if (class1 == X86_64_NO_CLASS)
2535 if (class2 == X86_64_NO_CLASS)
2538 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2539 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2540 return X86_64_MEMORY_CLASS;
2542 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2543 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2544 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2545 return X86_64_INTEGERSI_CLASS;
2546 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2547 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2548 return X86_64_INTEGER_CLASS;
2550 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2552 if (class1 == X86_64_X87_CLASS
2553 || class1 == X86_64_X87UP_CLASS
2554 || class1 == X86_64_COMPLEX_X87_CLASS
2555 || class2 == X86_64_X87_CLASS
2556 || class2 == X86_64_X87UP_CLASS
2557 || class2 == X86_64_COMPLEX_X87_CLASS)
2558 return X86_64_MEMORY_CLASS;
2560 /* Rule #6: Otherwise class SSE is used. */
2561 return X86_64_SSE_CLASS;
2564 /* Classify the argument of type TYPE and mode MODE.
2565 CLASSES will be filled by the register class used to pass each word
2566 of the operand. The number of words is returned. In case the parameter
2567 should be passed in memory, 0 is returned. As a special case for zero
2568 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2570 BIT_OFFSET is used internally for handling records and specifies offset
2571 of the offset in bits modulo 256 to avoid overflow cases.
2573 See the x86-64 PS ABI for details.
2577 classify_argument (enum machine_mode mode, tree type,
2578 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2580 HOST_WIDE_INT bytes =
2581 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2582 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2584 /* Variable sized entities are always passed/returned in memory. */
2588 if (mode != VOIDmode
2589 && targetm.calls.must_pass_in_stack (mode, type))
2592 if (type && AGGREGATE_TYPE_P (type))
2596 enum x86_64_reg_class subclasses[MAX_CLASSES];
2598 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2602 for (i = 0; i < words; i++)
2603 classes[i] = X86_64_NO_CLASS;
2605 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2606 signalize memory class, so handle it as special case. */
2609 classes[0] = X86_64_NO_CLASS;
2613 /* Classify each field of record and merge classes. */
2614 switch (TREE_CODE (type))
2617 /* For classes first merge in the field of the subclasses. */
2618 if (TYPE_BINFO (type))
2620 tree binfo, base_binfo;
2623 for (binfo = TYPE_BINFO (type), basenum = 0;
2624 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2627 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2628 tree type = BINFO_TYPE (base_binfo);
2630 num = classify_argument (TYPE_MODE (type),
2632 (offset + bit_offset) % 256);
2635 for (i = 0; i < num; i++)
2637 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2639 merge_classes (subclasses[i], classes[i + pos]);
2643 /* And now merge the fields of structure. */
2644 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2646 if (TREE_CODE (field) == FIELD_DECL)
2650 /* Bitfields are always classified as integer. Handle them
2651 early, since later code would consider them to be
2652 misaligned integers. */
2653 if (DECL_BIT_FIELD (field))
2655 for (i = int_bit_position (field) / 8 / 8;
2656 i < (int_bit_position (field)
2657 + tree_low_cst (DECL_SIZE (field), 0)
2660 merge_classes (X86_64_INTEGER_CLASS,
2665 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2666 TREE_TYPE (field), subclasses,
2667 (int_bit_position (field)
2668 + bit_offset) % 256);
2671 for (i = 0; i < num; i++)
2674 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2676 merge_classes (subclasses[i], classes[i + pos]);
2684 /* Arrays are handled as small records. */
2687 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2688 TREE_TYPE (type), subclasses, bit_offset);
2692 /* The partial classes are now full classes. */
2693 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2694 subclasses[0] = X86_64_SSE_CLASS;
2695 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2696 subclasses[0] = X86_64_INTEGER_CLASS;
2698 for (i = 0; i < words; i++)
2699 classes[i] = subclasses[i % num];
2704 case QUAL_UNION_TYPE:
2705 /* Unions are similar to RECORD_TYPE but offset is always 0.
2708 /* Unions are not derived. */
2709 gcc_assert (!TYPE_BINFO (type)
2710 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2711 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2713 if (TREE_CODE (field) == FIELD_DECL)
2716 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2717 TREE_TYPE (field), subclasses,
2721 for (i = 0; i < num; i++)
2722 classes[i] = merge_classes (subclasses[i], classes[i]);
2731 /* Final merger cleanup. */
2732 for (i = 0; i < words; i++)
2734 /* If one class is MEMORY, everything should be passed in
2736 if (classes[i] == X86_64_MEMORY_CLASS)
2739 /* The X86_64_SSEUP_CLASS should be always preceded by
2740 X86_64_SSE_CLASS. */
2741 if (classes[i] == X86_64_SSEUP_CLASS
2742 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2743 classes[i] = X86_64_SSE_CLASS;
2745 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2746 if (classes[i] == X86_64_X87UP_CLASS
2747 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2748 classes[i] = X86_64_SSE_CLASS;
2753 /* Compute alignment needed. We align all types to natural boundaries with
2754 exception of XFmode that is aligned to 64bits. */
2755 if (mode != VOIDmode && mode != BLKmode)
2757 int mode_alignment = GET_MODE_BITSIZE (mode);
2760 mode_alignment = 128;
2761 else if (mode == XCmode)
2762 mode_alignment = 256;
2763 if (COMPLEX_MODE_P (mode))
2764 mode_alignment /= 2;
2765 /* Misaligned fields are always returned in memory. */
2766 if (bit_offset % mode_alignment)
2770 /* for V1xx modes, just use the base mode */
2771 if (VECTOR_MODE_P (mode)
2772 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2773 mode = GET_MODE_INNER (mode);
2775 /* Classification of atomic types. */
2785 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2786 classes[0] = X86_64_INTEGERSI_CLASS;
2788 classes[0] = X86_64_INTEGER_CLASS;
2792 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2797 if (!(bit_offset % 64))
2798 classes[0] = X86_64_SSESF_CLASS;
2800 classes[0] = X86_64_SSE_CLASS;
2803 classes[0] = X86_64_SSEDF_CLASS;
2806 classes[0] = X86_64_X87_CLASS;
2807 classes[1] = X86_64_X87UP_CLASS;
2810 classes[0] = X86_64_SSE_CLASS;
2811 classes[1] = X86_64_SSEUP_CLASS;
2814 classes[0] = X86_64_SSE_CLASS;
2817 classes[0] = X86_64_SSEDF_CLASS;
2818 classes[1] = X86_64_SSEDF_CLASS;
2821 classes[0] = X86_64_COMPLEX_X87_CLASS;
2824 /* This modes is larger than 16 bytes. */
2832 classes[0] = X86_64_SSE_CLASS;
2833 classes[1] = X86_64_SSEUP_CLASS;
2839 classes[0] = X86_64_SSE_CLASS;
2845 gcc_assert (VECTOR_MODE_P (mode));
2850 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2852 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2853 classes[0] = X86_64_INTEGERSI_CLASS;
2855 classes[0] = X86_64_INTEGER_CLASS;
2856 classes[1] = X86_64_INTEGER_CLASS;
2857 return 1 + (bytes > 8);
2861 /* Examine the argument and return set number of register required in each
2862 class. Return 0 iff parameter should be passed in memory. */
2864 examine_argument (enum machine_mode mode, tree type, int in_return,
2865 int *int_nregs, int *sse_nregs)
2867 enum x86_64_reg_class class[MAX_CLASSES];
2868 int n = classify_argument (mode, type, class, 0);
2874 for (n--; n >= 0; n--)
2877 case X86_64_INTEGER_CLASS:
2878 case X86_64_INTEGERSI_CLASS:
2881 case X86_64_SSE_CLASS:
2882 case X86_64_SSESF_CLASS:
2883 case X86_64_SSEDF_CLASS:
2886 case X86_64_NO_CLASS:
2887 case X86_64_SSEUP_CLASS:
2889 case X86_64_X87_CLASS:
2890 case X86_64_X87UP_CLASS:
2894 case X86_64_COMPLEX_X87_CLASS:
2895 return in_return ? 2 : 0;
2896 case X86_64_MEMORY_CLASS:
2902 /* Construct container for the argument used by GCC interface. See
2903 FUNCTION_ARG for the detailed description. */
2906 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2907 tree type, int in_return, int nintregs, int nsseregs,
2908 const int *intreg, int sse_regno)
2910 enum machine_mode tmpmode;
2912 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2913 enum x86_64_reg_class class[MAX_CLASSES];
2917 int needed_sseregs, needed_intregs;
2918 rtx exp[MAX_CLASSES];
2921 n = classify_argument (mode, type, class, 0);
2922 if (TARGET_DEBUG_ARG)
2925 fprintf (stderr, "Memory class\n");
2928 fprintf (stderr, "Classes:");
2929 for (i = 0; i < n; i++)
2931 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2933 fprintf (stderr, "\n");
2938 if (!examine_argument (mode, type, in_return, &needed_intregs,
2941 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2944 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2945 some less clueful developer tries to use floating-point anyway. */
2946 if (needed_sseregs && !TARGET_SSE)
2948 static bool issued_error;
2951 issued_error = true;
2953 error ("SSE register return with SSE disabled");
2955 error ("SSE register argument with SSE disabled");
2960 /* First construct simple cases. Avoid SCmode, since we want to use
2961 single register to pass this type. */
2962 if (n == 1 && mode != SCmode)
2965 case X86_64_INTEGER_CLASS:
2966 case X86_64_INTEGERSI_CLASS:
2967 return gen_rtx_REG (mode, intreg[0]);
2968 case X86_64_SSE_CLASS:
2969 case X86_64_SSESF_CLASS:
2970 case X86_64_SSEDF_CLASS:
2971 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2972 case X86_64_X87_CLASS:
2973 case X86_64_COMPLEX_X87_CLASS:
2974 return gen_rtx_REG (mode, FIRST_STACK_REG);
2975 case X86_64_NO_CLASS:
2976 /* Zero sized array, struct or class. */
2981 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2983 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2985 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2986 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2987 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2988 && class[1] == X86_64_INTEGER_CLASS
2989 && (mode == CDImode || mode == TImode || mode == TFmode)
2990 && intreg[0] + 1 == intreg[1])
2991 return gen_rtx_REG (mode, intreg[0]);
2993 /* Otherwise figure out the entries of the PARALLEL. */
2994 for (i = 0; i < n; i++)
2998 case X86_64_NO_CLASS:
3000 case X86_64_INTEGER_CLASS:
3001 case X86_64_INTEGERSI_CLASS:
3002 /* Merge TImodes on aligned occasions here too. */
3003 if (i * 8 + 8 > bytes)
3004 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3005 else if (class[i] == X86_64_INTEGERSI_CLASS)
3009 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3010 if (tmpmode == BLKmode)
3012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3013 gen_rtx_REG (tmpmode, *intreg),
3017 case X86_64_SSESF_CLASS:
3018 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3019 gen_rtx_REG (SFmode,
3020 SSE_REGNO (sse_regno)),
3024 case X86_64_SSEDF_CLASS:
3025 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3026 gen_rtx_REG (DFmode,
3027 SSE_REGNO (sse_regno)),
3031 case X86_64_SSE_CLASS:
3032 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3037 gen_rtx_REG (tmpmode,
3038 SSE_REGNO (sse_regno)),
3040 if (tmpmode == TImode)
3049 /* Empty aligned struct, union or class. */
3053 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3054 for (i = 0; i < nexps; i++)
3055 XVECEXP (ret, 0, i) = exp [i];
3059 /* Update the data in CUM to advance over an argument
3060 of mode MODE and data type TYPE.
3061 (TYPE is null for libcalls where that information may not be available.) */
3064 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3065 tree type, int named)
3068 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3069 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3072 mode = type_natural_mode (type);
3074 if (TARGET_DEBUG_ARG)
3075 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3076 "mode=%s, named=%d)\n\n",
3077 words, cum->words, cum->nregs, cum->sse_nregs,
3078 GET_MODE_NAME (mode), named);
3082 int int_nregs, sse_nregs;
3083 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3084 cum->words += words;
3085 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3087 cum->nregs -= int_nregs;
3088 cum->sse_nregs -= sse_nregs;
3089 cum->regno += int_nregs;
3090 cum->sse_regno += sse_nregs;
3093 cum->words += words;
3111 cum->words += words;
3112 cum->nregs -= words;
3113 cum->regno += words;
3115 if (cum->nregs <= 0)
3123 if (cum->float_in_sse < 2)
3126 if (cum->float_in_sse < 1)
3137 if (!type || !AGGREGATE_TYPE_P (type))