1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_3dnow_a = m_ATHLON_K8;
747 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
748 /* Branch hints were put in P4 based on simulation result. But
749 after P4 was made, no performance benefit was observed with
750 branch hints. It also increases the code size. As the result,
751 icc never generates branch hints. */
752 const int x86_branch_hints = 0;
753 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
754 /* We probably ought to watch for partial register stalls on Generic32
755 compilation setting as well. However in current implementation the
756 partial register stalls are not eliminated very well - they can
757 be introduced via subregs synthesized by combine and can happen
758 in caller/callee saving sequences.
759 Because this option pays back little on PPro based chips and is in conflict
760 with partial reg. dependencies used by Athlon/P4 based chips, it is better
761 to leave it off for generic32 for now. */
762 const int x86_partial_reg_stall = m_PPRO;
763 const int x86_partial_flag_reg_stall = m_GENERIC;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold = 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted = 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1450 const processor_target_table[PROCESSOR_max] =
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE = 16,
1482 const processor_alias_table[] =
1484 {"i386", PROCESSOR_I386, 0},
1485 {"i486", PROCESSOR_I486, 0},
1486 {"i586", PROCESSOR_PENTIUM, 0},
1487 {"pentium", PROCESSOR_PENTIUM, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1489 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1490 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1493 {"i686", PROCESSOR_PENTIUMPRO, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1496 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1499 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1500 | PTA_MMX | PTA_PREFETCH_SSE},
1501 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"k6", PROCESSOR_K6, PTA_MMX},
1508 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1512 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1513 | PTA_3DNOW | PTA_3DNOW_A},
1514 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 | PTA_3DNOW_A | PTA_SSE},
1516 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1521 | PTA_SSE | PTA_SSE2 },
1522 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1523 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1524 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1534 int const pta_size = ARRAY_SIZE (processor_alias_table);
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS;
1540 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1541 SUBSUBTARGET_OVERRIDE_OPTIONS;
1544 /* -fPIC is the default for x86_64. */
1545 if (TARGET_MACHO && TARGET_64BIT)
1548 /* Set the default values for switches whose default depends on TARGET_64BIT
1549 in case they weren't overwritten by command line options. */
1552 /* Mach-O doesn't support omitting the frame pointer for now. */
1553 if (flag_omit_frame_pointer == 2)
1554 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1555 if (flag_asynchronous_unwind_tables == 2)
1556 flag_asynchronous_unwind_tables = 1;
1557 if (flag_pcc_struct_return == 2)
1558 flag_pcc_struct_return = 0;
1562 if (flag_omit_frame_pointer == 2)
1563 flag_omit_frame_pointer = 0;
1564 if (flag_asynchronous_unwind_tables == 2)
1565 flag_asynchronous_unwind_tables = 0;
1566 if (flag_pcc_struct_return == 2)
1567 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1570 /* Need to check -mtune=generic first. */
1571 if (ix86_tune_string)
1573 if (!strcmp (ix86_tune_string, "generic")
1574 || !strcmp (ix86_tune_string, "i686")
1575 /* As special support for cross compilers we read -mtune=native
1576 as -mtune=generic. With native compilers we won't see the
1577 -mtune=native, as it was changed by the driver. */
1578 || !strcmp (ix86_tune_string, "native"))
1581 ix86_tune_string = "generic64";
1583 ix86_tune_string = "generic32";
1585 else if (!strncmp (ix86_tune_string, "generic", 7))
1586 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1590 if (ix86_arch_string)
1591 ix86_tune_string = ix86_arch_string;
1592 if (!ix86_tune_string)
1594 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1595 ix86_tune_defaulted = 1;
1598 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1599 need to use a sensible tune option. */
1600 if (!strcmp (ix86_tune_string, "generic")
1601 || !strcmp (ix86_tune_string, "x86-64")
1602 || !strcmp (ix86_tune_string, "i686"))
1605 ix86_tune_string = "generic64";
1607 ix86_tune_string = "generic32";
1610 if (!strcmp (ix86_tune_string, "x86-64"))
1611 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1612 "-mtune=generic instead as appropriate.");
1614 if (!ix86_arch_string)
1615 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1616 if (!strcmp (ix86_arch_string, "generic"))
1617 error ("generic CPU can be used only for -mtune= switch");
1618 if (!strncmp (ix86_arch_string, "generic", 7))
1619 error ("bad value (%s) for -march= switch", ix86_arch_string);
1621 if (ix86_cmodel_string != 0)
1623 if (!strcmp (ix86_cmodel_string, "small"))
1624 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1625 else if (!strcmp (ix86_cmodel_string, "medium"))
1626 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1628 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1629 else if (!strcmp (ix86_cmodel_string, "32"))
1630 ix86_cmodel = CM_32;
1631 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1632 ix86_cmodel = CM_KERNEL;
1633 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1634 ix86_cmodel = CM_LARGE;
1636 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1640 ix86_cmodel = CM_32;
1642 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1644 if (ix86_asm_string != 0)
1647 && !strcmp (ix86_asm_string, "intel"))
1648 ix86_asm_dialect = ASM_INTEL;
1649 else if (!strcmp (ix86_asm_string, "att"))
1650 ix86_asm_dialect = ASM_ATT;
1652 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1654 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1655 error ("code model %qs not supported in the %s bit mode",
1656 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1657 if (ix86_cmodel == CM_LARGE)
1658 sorry ("code model %<large%> not supported yet");
1659 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1660 sorry ("%i-bit mode not compiled in",
1661 (target_flags & MASK_64BIT) ? 64 : 32);
1663 for (i = 0; i < pta_size; i++)
1664 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1666 ix86_arch = processor_alias_table[i].processor;
1667 /* Default cpu tuning to the architecture. */
1668 ix86_tune = ix86_arch;
1669 if (processor_alias_table[i].flags & PTA_MMX
1670 && !(target_flags_explicit & MASK_MMX))
1671 target_flags |= MASK_MMX;
1672 if (processor_alias_table[i].flags & PTA_3DNOW
1673 && !(target_flags_explicit & MASK_3DNOW))
1674 target_flags |= MASK_3DNOW;
1675 if (processor_alias_table[i].flags & PTA_3DNOW_A
1676 && !(target_flags_explicit & MASK_3DNOW_A))
1677 target_flags |= MASK_3DNOW_A;
1678 if (processor_alias_table[i].flags & PTA_SSE
1679 && !(target_flags_explicit & MASK_SSE))
1680 target_flags |= MASK_SSE;
1681 if (processor_alias_table[i].flags & PTA_SSE2
1682 && !(target_flags_explicit & MASK_SSE2))
1683 target_flags |= MASK_SSE2;
1684 if (processor_alias_table[i].flags & PTA_SSE3
1685 && !(target_flags_explicit & MASK_SSE3))
1686 target_flags |= MASK_SSE3;
1687 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1688 x86_prefetch_sse = true;
1689 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1690 error ("CPU you selected does not support x86-64 "
1696 error ("bad value (%s) for -march= switch", ix86_arch_string);
1698 for (i = 0; i < pta_size; i++)
1699 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1701 ix86_tune = processor_alias_table[i].processor;
1702 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1704 if (ix86_tune_defaulted)
1706 ix86_tune_string = "x86-64";
1707 for (i = 0; i < pta_size; i++)
1708 if (! strcmp (ix86_tune_string,
1709 processor_alias_table[i].name))
1711 ix86_tune = processor_alias_table[i].processor;
1714 error ("CPU you selected does not support x86-64 "
1717 /* Intel CPUs have always interpreted SSE prefetch instructions as
1718 NOPs; so, we can enable SSE prefetch instructions even when
1719 -mtune (rather than -march) points us to a processor that has them.
1720 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1721 higher processors. */
1722 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1723 x86_prefetch_sse = true;
1727 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1730 ix86_cost = &size_cost;
1732 ix86_cost = processor_target_table[ix86_tune].cost;
1733 target_flags |= processor_target_table[ix86_tune].target_enable;
1734 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1736 /* Arrange to set up i386_stack_locals for all functions. */
1737 init_machine_status = ix86_init_machine_status;
1739 /* Validate -mregparm= value. */
1740 if (ix86_regparm_string)
1742 i = atoi (ix86_regparm_string);
1743 if (i < 0 || i > REGPARM_MAX)
1744 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1750 ix86_regparm = REGPARM_MAX;
1752 /* If the user has provided any of the -malign-* options,
1753 warn and use that value only if -falign-* is not set.
1754 Remove this code in GCC 3.2 or later. */
1755 if (ix86_align_loops_string)
1757 warning (0, "-malign-loops is obsolete, use -falign-loops");
1758 if (align_loops == 0)
1760 i = atoi (ix86_align_loops_string);
1761 if (i < 0 || i > MAX_CODE_ALIGN)
1762 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1764 align_loops = 1 << i;
1768 if (ix86_align_jumps_string)
1770 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1771 if (align_jumps == 0)
1773 i = atoi (ix86_align_jumps_string);
1774 if (i < 0 || i > MAX_CODE_ALIGN)
1775 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1777 align_jumps = 1 << i;
1781 if (ix86_align_funcs_string)
1783 warning (0, "-malign-functions is obsolete, use -falign-functions");
1784 if (align_functions == 0)
1786 i = atoi (ix86_align_funcs_string);
1787 if (i < 0 || i > MAX_CODE_ALIGN)
1788 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1790 align_functions = 1 << i;
1794 /* Default align_* from the processor table. */
1795 if (align_loops == 0)
1797 align_loops = processor_target_table[ix86_tune].align_loop;
1798 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1800 if (align_jumps == 0)
1802 align_jumps = processor_target_table[ix86_tune].align_jump;
1803 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1805 if (align_functions == 0)
1807 align_functions = processor_target_table[ix86_tune].align_func;
1810 /* Validate -mbranch-cost= value, or provide default. */
1811 ix86_branch_cost = ix86_cost->branch_cost;
1812 if (ix86_branch_cost_string)
1814 i = atoi (ix86_branch_cost_string);
1816 error ("-mbranch-cost=%d is not between 0 and 5", i);
1818 ix86_branch_cost = i;
1820 if (ix86_section_threshold_string)
1822 i = atoi (ix86_section_threshold_string);
1824 error ("-mlarge-data-threshold=%d is negative", i);
1826 ix86_section_threshold = i;
1829 if (ix86_tls_dialect_string)
1831 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1832 ix86_tls_dialect = TLS_DIALECT_GNU;
1833 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1834 ix86_tls_dialect = TLS_DIALECT_GNU2;
1835 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1836 ix86_tls_dialect = TLS_DIALECT_SUN;
1838 error ("bad value (%s) for -mtls-dialect= switch",
1839 ix86_tls_dialect_string);
1842 /* Keep nonleaf frame pointers. */
1843 if (flag_omit_frame_pointer)
1844 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1845 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1846 flag_omit_frame_pointer = 1;
1848 /* If we're doing fast math, we don't care about comparison order
1849 wrt NaNs. This lets us use a shorter comparison sequence. */
1850 if (flag_finite_math_only)
1851 target_flags &= ~MASK_IEEE_FP;
1853 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1854 since the insns won't need emulation. */
1855 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1856 target_flags &= ~MASK_NO_FANCY_MATH_387;
1858 /* Likewise, if the target doesn't have a 387, or we've specified
1859 software floating point, don't use 387 inline intrinsics. */
1861 target_flags |= MASK_NO_FANCY_MATH_387;
1863 /* Turn on SSE2 builtins for -msse3. */
1865 target_flags |= MASK_SSE2;
1867 /* Turn on SSE builtins for -msse2. */
1869 target_flags |= MASK_SSE;
1871 /* Turn on MMX builtins for -msse. */
1874 target_flags |= MASK_MMX & ~target_flags_explicit;
1875 x86_prefetch_sse = true;
1878 /* Turn on MMX builtins for 3Dnow. */
1880 target_flags |= MASK_MMX;
1884 if (TARGET_ALIGN_DOUBLE)
1885 error ("-malign-double makes no sense in the 64bit mode");
1887 error ("-mrtd calling convention not supported in the 64bit mode");
1889 /* Enable by default the SSE and MMX builtins. Do allow the user to
1890 explicitly disable any of these. In particular, disabling SSE and
1891 MMX for kernel code is extremely useful. */
1893 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1894 & ~target_flags_explicit);
1898 /* i386 ABI does not specify red zone. It still makes sense to use it
1899 when programmer takes care to stack from being destroyed. */
1900 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1901 target_flags |= MASK_NO_RED_ZONE;
1904 /* Validate -mpreferred-stack-boundary= value, or provide default.
1905 The default of 128 bits is for Pentium III's SSE __m128. We can't
1906 change it because of optimize_size. Otherwise, we can't mix object
1907 files compiled with -Os and -On. */
1908 ix86_preferred_stack_boundary = 128;
1909 if (ix86_preferred_stack_boundary_string)
1911 i = atoi (ix86_preferred_stack_boundary_string);
1912 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1913 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1914 TARGET_64BIT ? 4 : 2);
1916 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1919 /* Accept -msseregparm only if at least SSE support is enabled. */
1920 if (TARGET_SSEREGPARM
1922 error ("-msseregparm used without SSE enabled");
1924 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1926 if (ix86_fpmath_string != 0)
1928 if (! strcmp (ix86_fpmath_string, "387"))
1929 ix86_fpmath = FPMATH_387;
1930 else if (! strcmp (ix86_fpmath_string, "sse"))
1934 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1935 ix86_fpmath = FPMATH_387;
1938 ix86_fpmath = FPMATH_SSE;
1940 else if (! strcmp (ix86_fpmath_string, "387,sse")
1941 || ! strcmp (ix86_fpmath_string, "sse,387"))
1945 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1946 ix86_fpmath = FPMATH_387;
1948 else if (!TARGET_80387)
1950 warning (0, "387 instruction set disabled, using SSE arithmetics");
1951 ix86_fpmath = FPMATH_SSE;
1954 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1957 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1960 /* If the i387 is disabled, then do not return values in it. */
1962 target_flags &= ~MASK_FLOAT_RETURNS;
1964 if ((x86_accumulate_outgoing_args & TUNEMASK)
1965 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1967 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1969 /* ??? Unwind info is not correct around the CFG unless either a frame
1970 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1971 unwind info generation to be aware of the CFG and propagating states
1973 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1974 || flag_exceptions || flag_non_call_exceptions)
1975 && flag_omit_frame_pointer
1976 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1978 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1979 warning (0, "unwind tables currently require either a frame pointer "
1980 "or -maccumulate-outgoing-args for correctness");
1981 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1984 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1987 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1988 p = strchr (internal_label_prefix, 'X');
1989 internal_label_prefix_len = p - internal_label_prefix;
1993 /* When scheduling description is not available, disable scheduler pass
1994 so it won't slow down the compilation and make x87 code slower. */
1995 if (!TARGET_SCHEDULE)
1996 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1999 /* switch to the appropriate section for output of DECL.
2000 DECL is either a `VAR_DECL' node or a constant of some sort.
2001 RELOC indicates whether forming the initial value of DECL requires
2002 link-time relocations. */
2005 x86_64_elf_select_section (tree decl, int reloc,
2006 unsigned HOST_WIDE_INT align)
2008 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2009 && ix86_in_large_data_p (decl))
2011 const char *sname = NULL;
2012 unsigned int flags = SECTION_WRITE;
2013 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2018 case SECCAT_DATA_REL:
2019 sname = ".ldata.rel";
2021 case SECCAT_DATA_REL_LOCAL:
2022 sname = ".ldata.rel.local";
2024 case SECCAT_DATA_REL_RO:
2025 sname = ".ldata.rel.ro";
2027 case SECCAT_DATA_REL_RO_LOCAL:
2028 sname = ".ldata.rel.ro.local";
2032 flags |= SECTION_BSS;
2035 case SECCAT_RODATA_MERGE_STR:
2036 case SECCAT_RODATA_MERGE_STR_INIT:
2037 case SECCAT_RODATA_MERGE_CONST:
2041 case SECCAT_SRODATA:
2048 /* We don't split these for medium model. Place them into
2049 default sections and hope for best. */
2054 /* We might get called with string constants, but get_named_section
2055 doesn't like them as they are not DECLs. Also, we need to set
2056 flags in that case. */
2058 return get_section (sname, flags, NULL);
2059 return get_named_section (decl, sname, reloc);
2062 return default_elf_select_section (decl, reloc, align);
2065 /* Build up a unique section name, expressed as a
2066 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2067 RELOC indicates whether the initial value of EXP requires
2068 link-time relocations. */
2071 x86_64_elf_unique_section (tree decl, int reloc)
2073 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2074 && ix86_in_large_data_p (decl))
2076 const char *prefix = NULL;
2077 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2078 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2080 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2083 case SECCAT_DATA_REL:
2084 case SECCAT_DATA_REL_LOCAL:
2085 case SECCAT_DATA_REL_RO:
2086 case SECCAT_DATA_REL_RO_LOCAL:
2087 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2090 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2093 case SECCAT_RODATA_MERGE_STR:
2094 case SECCAT_RODATA_MERGE_STR_INIT:
2095 case SECCAT_RODATA_MERGE_CONST:
2096 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2098 case SECCAT_SRODATA:
2105 /* We don't split these for medium model. Place them into
2106 default sections and hope for best. */
2114 plen = strlen (prefix);
2116 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2117 name = targetm.strip_name_encoding (name);
2118 nlen = strlen (name);
2120 string = alloca (nlen + plen + 1);
2121 memcpy (string, prefix, plen);
2122 memcpy (string + plen, name, nlen + 1);
2124 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2128 default_unique_section (decl, reloc);
2131 #ifdef COMMON_ASM_OP
2132 /* This says how to output assembler code to declare an
2133 uninitialized external linkage data object.
2135 For medium model x86-64 we need to use .largecomm opcode for
2138 x86_elf_aligned_common (FILE *file,
2139 const char *name, unsigned HOST_WIDE_INT size,
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && size > (unsigned int)ix86_section_threshold)
2144 fprintf (file, ".largecomm\t");
2146 fprintf (file, "%s", COMMON_ASM_OP);
2147 assemble_name (file, name);
2148 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2149 size, align / BITS_PER_UNIT);
2152 /* Utility function for targets to use in implementing
2153 ASM_OUTPUT_ALIGNED_BSS. */
2156 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2157 const char *name, unsigned HOST_WIDE_INT size,
2160 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2161 && size > (unsigned int)ix86_section_threshold)
2162 switch_to_section (get_named_section (decl, ".lbss", 0));
2164 switch_to_section (bss_section);
2165 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2166 #ifdef ASM_DECLARE_OBJECT_NAME
2167 last_assemble_variable_decl = decl;
2168 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2170 /* Standard thing is just output label for the object. */
2171 ASM_OUTPUT_LABEL (file, name);
2172 #endif /* ASM_DECLARE_OBJECT_NAME */
2173 ASM_OUTPUT_SKIP (file, size ? size : 1);
2178 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2180 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2181 make the problem with not enough registers even worse. */
2182 #ifdef INSN_SCHEDULING
2184 flag_schedule_insns = 0;
2188 /* The Darwin libraries never set errno, so we might as well
2189 avoid calling them when that's the only reason we would. */
2190 flag_errno_math = 0;
2192 /* The default values of these switches depend on the TARGET_64BIT
2193 that is not known at this moment. Mark these values with 2 and
2194 let user the to override these. In case there is no command line option
2195 specifying them, we will set the defaults in override_options. */
2197 flag_omit_frame_pointer = 2;
2198 flag_pcc_struct_return = 2;
2199 flag_asynchronous_unwind_tables = 2;
2200 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2201 SUBTARGET_OPTIMIZATION_OPTIONS;
2205 /* Table of valid machine attributes. */
2206 const struct attribute_spec ix86_attribute_table[] =
2208 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2209 /* Stdcall attribute says callee is responsible for popping arguments
2210 if they are not variable. */
2211 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2212 /* Fastcall attribute says callee is responsible for popping arguments
2213 if they are not variable. */
2214 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2215 /* Cdecl attribute says the callee is a normal C declaration */
2216 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2217 /* Regparm attribute specifies how many integer arguments are to be
2218 passed in registers. */
2219 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2220 /* Sseregparm attribute says we are using x86_64 calling conventions
2221 for FP arguments. */
2222 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2223 /* force_align_arg_pointer says this function realigns the stack at entry. */
2224 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2225 false, true, true, ix86_handle_cconv_attribute },
2226 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2227 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2228 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2229 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2231 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2232 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2233 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2234 SUBTARGET_ATTRIBUTE_TABLE,
2236 { NULL, 0, 0, false, false, false, NULL }
2239 /* Decide whether we can make a sibling call to a function. DECL is the
2240 declaration of the function being targeted by the call and EXP is the
2241 CALL_EXPR representing the call. */
2244 ix86_function_ok_for_sibcall (tree decl, tree exp)
2249 /* If we are generating position-independent code, we cannot sibcall
2250 optimize any indirect call, or a direct call to a global function,
2251 as the PLT requires %ebx be live. */
2252 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2259 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2260 if (POINTER_TYPE_P (func))
2261 func = TREE_TYPE (func);
2264 /* Check that the return value locations are the same. Like
2265 if we are returning floats on the 80387 register stack, we cannot
2266 make a sibcall from a function that doesn't return a float to a
2267 function that does or, conversely, from a function that does return
2268 a float to a function that doesn't; the necessary stack adjustment
2269 would not be executed. This is also the place we notice
2270 differences in the return value ABI. Note that it is ok for one
2271 of the functions to have void return type as long as the return
2272 value of the other is passed in a register. */
2273 a = ix86_function_value (TREE_TYPE (exp), func, false);
2274 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2276 if (STACK_REG_P (a) || STACK_REG_P (b))
2278 if (!rtx_equal_p (a, b))
2281 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2283 else if (!rtx_equal_p (a, b))
2286 /* If this call is indirect, we'll need to be able to use a call-clobbered
2287 register for the address of the target function. Make sure that all
2288 such registers are not used for passing parameters. */
2289 if (!decl && !TARGET_64BIT)
2293 /* We're looking at the CALL_EXPR, we need the type of the function. */
2294 type = TREE_OPERAND (exp, 0); /* pointer expression */
2295 type = TREE_TYPE (type); /* pointer type */
2296 type = TREE_TYPE (type); /* function type */
2298 if (ix86_function_regparm (type, NULL) >= 3)
2300 /* ??? Need to count the actual number of registers to be used,
2301 not the possible number of registers. Fix later. */
2306 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2307 /* Dllimport'd functions are also called indirectly. */
2308 if (decl && DECL_DLLIMPORT_P (decl)
2309 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2313 /* If we forced aligned the stack, then sibcalling would unalign the
2314 stack, which may break the called function. */
2315 if (cfun->machine->force_align_arg_pointer)
2318 /* Otherwise okay. That also includes certain types of indirect calls. */
2322 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2323 calling convention attributes;
2324 arguments as in struct attribute_spec.handler. */
2327 ix86_handle_cconv_attribute (tree *node, tree name,
2329 int flags ATTRIBUTE_UNUSED,
2332 if (TREE_CODE (*node) != FUNCTION_TYPE
2333 && TREE_CODE (*node) != METHOD_TYPE
2334 && TREE_CODE (*node) != FIELD_DECL
2335 && TREE_CODE (*node) != TYPE_DECL)
2337 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2338 IDENTIFIER_POINTER (name));
2339 *no_add_attrs = true;
2343 /* Can combine regparm with all attributes but fastcall. */
2344 if (is_attribute_p ("regparm", name))
2348 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2350 error ("fastcall and regparm attributes are not compatible");
2353 cst = TREE_VALUE (args);
2354 if (TREE_CODE (cst) != INTEGER_CST)
2356 warning (OPT_Wattributes,
2357 "%qs attribute requires an integer constant argument",
2358 IDENTIFIER_POINTER (name));
2359 *no_add_attrs = true;
2361 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2363 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2364 IDENTIFIER_POINTER (name), REGPARM_MAX);
2365 *no_add_attrs = true;
2369 && lookup_attribute (ix86_force_align_arg_pointer_string,
2370 TYPE_ATTRIBUTES (*node))
2371 && compare_tree_int (cst, REGPARM_MAX-1))
2373 error ("%s functions limited to %d register parameters",
2374 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2382 warning (OPT_Wattributes, "%qs attribute ignored",
2383 IDENTIFIER_POINTER (name));
2384 *no_add_attrs = true;
2388 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2389 if (is_attribute_p ("fastcall", name))
2391 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2393 error ("fastcall and cdecl attributes are not compatible");
2395 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2397 error ("fastcall and stdcall attributes are not compatible");
2399 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2401 error ("fastcall and regparm attributes are not compatible");
2405 /* Can combine stdcall with fastcall (redundant), regparm and
2407 else if (is_attribute_p ("stdcall", name))
2409 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2411 error ("stdcall and cdecl attributes are not compatible");
2413 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2415 error ("stdcall and fastcall attributes are not compatible");
2419 /* Can combine cdecl with regparm and sseregparm. */
2420 else if (is_attribute_p ("cdecl", name))
2422 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2424 error ("stdcall and cdecl attributes are not compatible");
2426 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2428 error ("fastcall and cdecl attributes are not compatible");
2432 /* Can combine sseregparm with all attributes. */
2437 /* Return 0 if the attributes for two types are incompatible, 1 if they
2438 are compatible, and 2 if they are nearly compatible (which causes a
2439 warning to be generated). */
2442 ix86_comp_type_attributes (tree type1, tree type2)
2444 /* Check for mismatch of non-default calling convention. */
2445 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2447 if (TREE_CODE (type1) != FUNCTION_TYPE)
2450 /* Check for mismatched fastcall/regparm types. */
2451 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2452 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2453 || (ix86_function_regparm (type1, NULL)
2454 != ix86_function_regparm (type2, NULL)))
2457 /* Check for mismatched sseregparm types. */
2458 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2459 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2462 /* Check for mismatched return types (cdecl vs stdcall). */
2463 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2464 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2470 /* Return the regparm value for a function with the indicated TYPE and DECL.
2471 DECL may be NULL when calling function indirectly
2472 or considering a libcall. */
2475 ix86_function_regparm (tree type, tree decl)
2478 int regparm = ix86_regparm;
2479 bool user_convention = false;
2483 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2486 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2487 user_convention = true;
2490 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2493 user_convention = true;
2496 /* Use register calling convention for local functions when possible. */
2497 if (!TARGET_64BIT && !user_convention && decl
2498 && flag_unit_at_a_time && !profile_flag)
2500 struct cgraph_local_info *i = cgraph_local_info (decl);
2503 int local_regparm, globals = 0, regno;
2505 /* Make sure no regparm register is taken by a global register
2507 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2508 if (global_regs[local_regparm])
2510 /* We can't use regparm(3) for nested functions as these use
2511 static chain pointer in third argument. */
2512 if (local_regparm == 3
2513 && decl_function_context (decl)
2514 && !DECL_NO_STATIC_CHAIN (decl))
2516 /* If the function realigns its stackpointer, the
2517 prologue will clobber %ecx. If we've already
2518 generated code for the callee, the callee
2519 DECL_STRUCT_FUNCTION is gone, so we fall back to
2520 scanning the attributes for the self-realigning
2522 if ((DECL_STRUCT_FUNCTION (decl)
2523 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2524 || (!DECL_STRUCT_FUNCTION (decl)
2525 && lookup_attribute (ix86_force_align_arg_pointer_string,
2526 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2528 /* Each global register variable increases register preassure,
2529 so the more global reg vars there are, the smaller regparm
2530 optimization use, unless requested by the user explicitly. */
2531 for (regno = 0; regno < 6; regno++)
2532 if (global_regs[regno])
2535 = globals < local_regparm ? local_regparm - globals : 0;
2537 if (local_regparm > regparm)
2538 regparm = local_regparm;
2545 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2546 in SSE registers for a function with the indicated TYPE and DECL.
2547 DECL may be NULL when calling function indirectly
2548 or considering a libcall. Otherwise return 0. */
2551 ix86_function_sseregparm (tree type, tree decl)
2553 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2554 by the sseregparm attribute. */
2555 if (TARGET_SSEREGPARM
2557 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2562 error ("Calling %qD with attribute sseregparm without "
2563 "SSE/SSE2 enabled", decl);
2565 error ("Calling %qT with attribute sseregparm without "
2566 "SSE/SSE2 enabled", type);
2573 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2574 in SSE registers even for 32-bit mode and not just 3, but up to
2575 8 SSE arguments in registers. */
2576 if (!TARGET_64BIT && decl
2577 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2579 struct cgraph_local_info *i = cgraph_local_info (decl);
2581 return TARGET_SSE2 ? 2 : 1;
2587 /* Return true if EAX is live at the start of the function. Used by
2588 ix86_expand_prologue to determine if we need special help before
2589 calling allocate_stack_worker. */
2592 ix86_eax_live_at_start_p (void)
2594 /* Cheat. Don't bother working forward from ix86_function_regparm
2595 to the function type to whether an actual argument is located in
2596 eax. Instead just look at cfg info, which is still close enough
2597 to correct at this point. This gives false positives for broken
2598 functions that might use uninitialized data that happens to be
2599 allocated in eax, but who cares? */
2600 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2603 /* Value is the number of bytes of arguments automatically
2604 popped when returning from a subroutine call.
2605 FUNDECL is the declaration node of the function (as a tree),
2606 FUNTYPE is the data type of the function (as a tree),
2607 or for a library call it is an identifier node for the subroutine name.
2608 SIZE is the number of bytes of arguments passed on the stack.
2610 On the 80386, the RTD insn may be used to pop them if the number
2611 of args is fixed, but if the number is variable then the caller
2612 must pop them all. RTD can't be used for library calls now
2613 because the library is compiled with the Unix compiler.
2614 Use of RTD is a selectable option, since it is incompatible with
2615 standard Unix calling sequences. If the option is not selected,
2616 the caller must always pop the args.
2618 The attribute stdcall is equivalent to RTD on a per module basis. */
2621 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2623 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2625 /* Cdecl functions override -mrtd, and never pop the stack. */
2626 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2628 /* Stdcall and fastcall functions will pop the stack if not
2630 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2631 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2635 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2636 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2637 == void_type_node)))
2641 /* Lose any fake structure return argument if it is passed on the stack. */
2642 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2644 && !KEEP_AGGREGATE_RETURN_POINTER)
2646 int nregs = ix86_function_regparm (funtype, fundecl);
2649 return GET_MODE_SIZE (Pmode);
2655 /* Argument support functions. */
2657 /* Return true when register may be used to pass function parameters. */
2659 ix86_function_arg_regno_p (int regno)
2663 return (regno < REGPARM_MAX
2664 || (TARGET_MMX && MMX_REGNO_P (regno)
2665 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2666 || (TARGET_SSE && SSE_REGNO_P (regno)
2667 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2669 if (TARGET_SSE && SSE_REGNO_P (regno)
2670 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2672 /* RAX is used as hidden argument to va_arg functions. */
2675 for (i = 0; i < REGPARM_MAX; i++)
2676 if (regno == x86_64_int_parameter_registers[i])
2681 /* Return if we do not know how to pass TYPE solely in registers. */
2684 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2686 if (must_pass_in_stack_var_size_or_pad (mode, type))
2689 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2690 The layout_type routine is crafty and tries to trick us into passing
2691 currently unsupported vector types on the stack by using TImode. */
2692 return (!TARGET_64BIT && mode == TImode
2693 && type && TREE_CODE (type) != VECTOR_TYPE);
2696 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2697 for a call to a function whose data type is FNTYPE.
2698 For a library call, FNTYPE is 0. */
2701 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2702 tree fntype, /* tree ptr for function decl */
2703 rtx libname, /* SYMBOL_REF of library name or 0 */
2706 static CUMULATIVE_ARGS zero_cum;
2707 tree param, next_param;
2709 if (TARGET_DEBUG_ARG)
2711 fprintf (stderr, "\ninit_cumulative_args (");
2713 fprintf (stderr, "fntype code = %s, ret code = %s",
2714 tree_code_name[(int) TREE_CODE (fntype)],
2715 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2717 fprintf (stderr, "no fntype");
2720 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2725 /* Set up the number of registers to use for passing arguments. */
2726 cum->nregs = ix86_regparm;
2728 cum->sse_nregs = SSE_REGPARM_MAX;
2730 cum->mmx_nregs = MMX_REGPARM_MAX;
2731 cum->warn_sse = true;
2732 cum->warn_mmx = true;
2733 cum->maybe_vaarg = false;
2735 /* Use ecx and edx registers if function has fastcall attribute,
2736 else look for regparm information. */
2737 if (fntype && !TARGET_64BIT)
2739 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2745 cum->nregs = ix86_function_regparm (fntype, fndecl);
2748 /* Set up the number of SSE registers used for passing SFmode
2749 and DFmode arguments. Warn for mismatching ABI. */
2750 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2752 /* Determine if this function has variable arguments. This is
2753 indicated by the last argument being 'void_type_mode' if there
2754 are no variable arguments. If there are variable arguments, then
2755 we won't pass anything in registers in 32-bit mode. */
2757 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2759 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2760 param != 0; param = next_param)
2762 next_param = TREE_CHAIN (param);
2763 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2773 cum->float_in_sse = 0;
2775 cum->maybe_vaarg = true;
2779 if ((!fntype && !libname)
2780 || (fntype && !TYPE_ARG_TYPES (fntype)))
2781 cum->maybe_vaarg = true;
2783 if (TARGET_DEBUG_ARG)
2784 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2789 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2790 But in the case of vector types, it is some vector mode.
2792 When we have only some of our vector isa extensions enabled, then there
2793 are some modes for which vector_mode_supported_p is false. For these
2794 modes, the generic vector support in gcc will choose some non-vector mode
2795 in order to implement the type. By computing the natural mode, we'll
2796 select the proper ABI location for the operand and not depend on whatever
2797 the middle-end decides to do with these vector types. */
2799 static enum machine_mode
2800 type_natural_mode (tree type)
2802 enum machine_mode mode = TYPE_MODE (type);
2804 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2806 HOST_WIDE_INT size = int_size_in_bytes (type);
2807 if ((size == 8 || size == 16)
2808 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2809 && TYPE_VECTOR_SUBPARTS (type) > 1)
2811 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2813 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2814 mode = MIN_MODE_VECTOR_FLOAT;
2816 mode = MIN_MODE_VECTOR_INT;
2818 /* Get the mode which has this inner mode and number of units. */
2819 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2820 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2821 && GET_MODE_INNER (mode) == innermode)
2831 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2832 this may not agree with the mode that the type system has chosen for the
2833 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2834 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2837 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2842 if (orig_mode != BLKmode)
2843 tmp = gen_rtx_REG (orig_mode, regno);
2846 tmp = gen_rtx_REG (mode, regno);
2847 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2848 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2854 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2855 of this code is to classify each 8bytes of incoming argument by the register
2856 class and assign registers accordingly. */
2858 /* Return the union class of CLASS1 and CLASS2.
2859 See the x86-64 PS ABI for details. */
2861 static enum x86_64_reg_class
2862 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2864 /* Rule #1: If both classes are equal, this is the resulting class. */
2865 if (class1 == class2)
2868 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2870 if (class1 == X86_64_NO_CLASS)
2872 if (class2 == X86_64_NO_CLASS)
2875 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2876 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2877 return X86_64_MEMORY_CLASS;
2879 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2880 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2881 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2882 return X86_64_INTEGERSI_CLASS;
2883 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2884 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2885 return X86_64_INTEGER_CLASS;
2887 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2889 if (class1 == X86_64_X87_CLASS
2890 || class1 == X86_64_X87UP_CLASS
2891 || class1 == X86_64_COMPLEX_X87_CLASS
2892 || class2 == X86_64_X87_CLASS
2893 || class2 == X86_64_X87UP_CLASS
2894 || class2 == X86_64_COMPLEX_X87_CLASS)
2895 return X86_64_MEMORY_CLASS;
2897 /* Rule #6: Otherwise class SSE is used. */
2898 return X86_64_SSE_CLASS;
2901 /* Classify the argument of type TYPE and mode MODE.
2902 CLASSES will be filled by the register class used to pass each word
2903 of the operand. The number of words is returned. In case the parameter
2904 should be passed in memory, 0 is returned. As a special case for zero
2905 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2907 BIT_OFFSET is used internally for handling records and specifies offset
2908 of the offset in bits modulo 256 to avoid overflow cases.
2910 See the x86-64 PS ABI for details.
2914 classify_argument (enum machine_mode mode, tree type,
2915 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2917 HOST_WIDE_INT bytes =
2918 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2919 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2921 /* Variable sized entities are always passed/returned in memory. */
2925 if (mode != VOIDmode
2926 && targetm.calls.must_pass_in_stack (mode, type))
2929 if (type && AGGREGATE_TYPE_P (type))
2933 enum x86_64_reg_class subclasses[MAX_CLASSES];
2935 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2939 for (i = 0; i < words; i++)
2940 classes[i] = X86_64_NO_CLASS;
2942 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2943 signalize memory class, so handle it as special case. */
2946 classes[0] = X86_64_NO_CLASS;
2950 /* Classify each field of record and merge classes. */
2951 switch (TREE_CODE (type))
2954 /* And now merge the fields of structure. */
2955 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2957 if (TREE_CODE (field) == FIELD_DECL)
2961 if (TREE_TYPE (field) == error_mark_node)
2964 /* Bitfields are always classified as integer. Handle them
2965 early, since later code would consider them to be
2966 misaligned integers. */
2967 if (DECL_BIT_FIELD (field))
2969 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2970 i < ((int_bit_position (field) + (bit_offset % 64))
2971 + tree_low_cst (DECL_SIZE (field), 0)
2974 merge_classes (X86_64_INTEGER_CLASS,
2979 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2980 TREE_TYPE (field), subclasses,
2981 (int_bit_position (field)
2982 + bit_offset) % 256);
2985 for (i = 0; i < num; i++)
2988 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2990 merge_classes (subclasses[i], classes[i + pos]);
2998 /* Arrays are handled as small records. */
3001 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3002 TREE_TYPE (type), subclasses, bit_offset);
3006 /* The partial classes are now full classes. */
3007 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3008 subclasses[0] = X86_64_SSE_CLASS;
3009 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3010 subclasses[0] = X86_64_INTEGER_CLASS;
3012 for (i = 0; i < words; i++)
3013 classes[i] = subclasses[i % num];
3018 case QUAL_UNION_TYPE:
3019 /* Unions are similar to RECORD_TYPE but offset is always 0.
3021 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3023 if (TREE_CODE (field) == FIELD_DECL)
3027 if (TREE_TYPE (field) == error_mark_node)
3030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3031 TREE_TYPE (field), subclasses,
3035 for (i = 0; i < num; i++)
3036 classes[i] = merge_classes (subclasses[i], classes[i]);
3045 /* Final merger cleanup. */
3046 for (i = 0; i < words; i++)
3048 /* If one class is MEMORY, everything should be passed in
3050 if (classes[i] == X86_64_MEMORY_CLASS)
3053 /* The X86_64_SSEUP_CLASS should be always preceded by
3054 X86_64_SSE_CLASS. */
3055 if (classes[i] == X86_64_SSEUP_CLASS
3056 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3057 classes[i] = X86_64_SSE_CLASS;
3059 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3060 if (classes[i] == X86_64_X87UP_CLASS
3061 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3062 classes[i] = X86_64_SSE_CLASS;
3067 /* Compute alignment needed. We align all types to natural boundaries with
3068 exception of XFmode that is aligned to 64bits. */
3069 if (mode != VOIDmode && mode != BLKmode)
3071 int mode_alignment = GET_MODE_BITSIZE (mode);
3074 mode_alignment = 128;
3075 else if (mode == XCmode)
3076 mode_alignment = 256;
3077 if (COMPLEX_MODE_P (mode))
3078 mode_alignment /= 2;
3079 /* Misaligned fields are always returned in memory. */
3080 if (bit_offset % mode_alignment)
3084 /* for V1xx modes, just use the base mode */
3085 if (VECTOR_MODE_P (mode)
3086 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3087 mode = GET_MODE_INNER (mode);
3089 /* Classification of atomic types. */
3094 classes[0] = X86_64_SSE_CLASS;
3097 classes[0] = X86_64_SSE_CLASS;
3098 classes[1] = X86_64_SSEUP_CLASS;
3107 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3108 classes[0] = X86_64_INTEGERSI_CLASS;
3110 classes[0] = X86_64_INTEGER_CLASS;
3114 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3119 if (!(bit_offset % 64))
3120 classes[0] = X86_64_SSESF_CLASS;
3122 classes[0] = X86_64_SSE_CLASS;
3125 classes[0] = X86_64_SSEDF_CLASS;
3128 classes[0] = X86_64_X87_CLASS;
3129 classes[1] = X86_64_X87UP_CLASS;
3132 classes[0] = X86_64_SSE_CLASS;
3133 classes[1] = X86_64_SSEUP_CLASS;
3136 classes[0] = X86_64_SSE_CLASS;
3139 classes[0] = X86_64_SSEDF_CLASS;
3140 classes[1] = X86_64_SSEDF_CLASS;
3143 classes[0] = X86_64_COMPLEX_X87_CLASS;
3146 /* This modes is larger than 16 bytes. */
3154 classes[0] = X86_64_SSE_CLASS;
3155 classes[1] = X86_64_SSEUP_CLASS;
3161 classes[0] = X86_64_SSE_CLASS;
3167 gcc_assert (VECTOR_MODE_P (mode));
3172 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3174 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3175 classes[0] = X86_64_INTEGERSI_CLASS;
3177 classes[0] = X86_64_INTEGER_CLASS;
3178 classes[1] = X86_64_INTEGER_CLASS;
3179 return 1 + (bytes > 8);
3183 /* Examine the argument and return set number of register required in each
3184 class. Return 0 iff parameter should be passed in memory. */
3186 examine_argument (enum machine_mode mode, tree type, int in_return,
3187 int *int_nregs, int *sse_nregs)
3189 enum x86_64_reg_class class[MAX_CLASSES];
3190 int n = classify_argument (mode, type, class, 0);
3196 for (n--; n >= 0; n--)
3199 case X86_64_INTEGER_CLASS:
3200 case X86_64_INTEGERSI_CLASS:
3203 case X86_64_SSE_CLASS:
3204 case X86_64_SSESF_CLASS:
3205 case X86_64_SSEDF_CLASS:
3208 case X86_64_NO_CLASS:
3209 case X86_64_SSEUP_CLASS:
3211 case X86_64_X87_CLASS:
3212 case X86_64_X87UP_CLASS:
3216 case X86_64_COMPLEX_X87_CLASS:
3217 return in_return ? 2 : 0;
3218 case X86_64_MEMORY_CLASS:
3224 /* Construct container for the argument used by GCC interface. See
3225 FUNCTION_ARG for the detailed description. */
3228 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3229 tree type, int in_return, int nintregs, int nsseregs,
3230 const int *intreg, int sse_regno)
3232 /* The following variables hold the static issued_error state. */
3233 static bool issued_sse_arg_error;
3234 static bool issued_sse_ret_error;
3235 static bool issued_x87_ret_error;
3237 enum machine_mode tmpmode;
3239 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3240 enum x86_64_reg_class class[MAX_CLASSES];
3244 int needed_sseregs, needed_intregs;
3245 rtx exp[MAX_CLASSES];
3248 n = classify_argument (mode, type, class, 0);
3249 if (TARGET_DEBUG_ARG)
3252 fprintf (stderr, "Memory class\n");
3255 fprintf (stderr, "Classes:");
3256 for (i = 0; i < n; i++)
3258 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3260 fprintf (stderr, "\n");
3265 if (!examine_argument (mode, type, in_return, &needed_intregs,
3268 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3271 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3272 some less clueful developer tries to use floating-point anyway. */
3273 if (needed_sseregs && !TARGET_SSE)
3277 if (!issued_sse_ret_error)
3279 error ("SSE register return with SSE disabled");
3280 issued_sse_ret_error = true;
3283 else if (!issued_sse_arg_error)
3285 error ("SSE register argument with SSE disabled");
3286 issued_sse_arg_error = true;
3291 /* Likewise, error if the ABI requires us to return values in the
3292 x87 registers and the user specified -mno-80387. */
3293 if (!TARGET_80387 && in_return)
3294 for (i = 0; i < n; i++)
3295 if (class[i] == X86_64_X87_CLASS
3296 || class[i] == X86_64_X87UP_CLASS
3297 || class[i] == X86_64_COMPLEX_X87_CLASS)
3299 if (!issued_x87_ret_error)
3301 error ("x87 register return with x87 disabled");
3302 issued_x87_ret_error = true;
3307 /* First construct simple cases. Avoid SCmode, since we want to use
3308 single register to pass this type. */
3309 if (n == 1 && mode != SCmode)
3312 case X86_64_INTEGER_CLASS:
3313 case X86_64_INTEGERSI_CLASS:
3314 return gen_rtx_REG (mode, intreg[0]);
3315 case X86_64_SSE_CLASS:
3316 case X86_64_SSESF_CLASS:
3317 case X86_64_SSEDF_CLASS:
3318 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3319 case X86_64_X87_CLASS:
3320 case X86_64_COMPLEX_X87_CLASS:
3321 return gen_rtx_REG (mode, FIRST_STACK_REG);
3322 case X86_64_NO_CLASS:
3323 /* Zero sized array, struct or class. */
3328 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3330 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3332 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3333 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3334 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3335 && class[1] == X86_64_INTEGER_CLASS
3336 && (mode == CDImode || mode == TImode || mode == TFmode)
3337 && intreg[0] + 1 == intreg[1])
3338 return gen_rtx_REG (mode, intreg[0]);
3340 /* Otherwise figure out the entries of the PARALLEL. */
3341 for (i = 0; i < n; i++)
3345 case X86_64_NO_CLASS:
3347 case X86_64_INTEGER_CLASS:
3348 case X86_64_INTEGERSI_CLASS:
3349 /* Merge TImodes on aligned occasions here too. */
3350 if (i * 8 + 8 > bytes)
3351 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3352 else if (class[i] == X86_64_INTEGERSI_CLASS)
3356 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3357 if (tmpmode == BLKmode)
3359 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3360 gen_rtx_REG (tmpmode, *intreg),
3364 case X86_64_SSESF_CLASS:
3365 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3366 gen_rtx_REG (SFmode,
3367 SSE_REGNO (sse_regno)),
3371 case X86_64_SSEDF_CLASS:
3372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3373 gen_rtx_REG (DFmode,
3374 SSE_REGNO (sse_regno)),
3378 case X86_64_SSE_CLASS:
3379 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3383 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3384 gen_rtx_REG (tmpmode,
3385 SSE_REGNO (sse_regno)),
3387 if (tmpmode == TImode)
3396 /* Empty aligned struct, union or class. */
3400 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3401 for (i = 0; i < nexps; i++)
3402 XVECEXP (ret, 0, i) = exp [i];
3406 /* Update the data in CUM to advance over an argument
3407 of mode MODE and data type TYPE.
3408 (TYPE is null for libcalls where that information may not be available.) */
3411 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3412 tree type, int named)
3415 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3416 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3419 mode = type_natural_mode (type);
3421 if (TARGET_DEBUG_ARG)
3422 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3423 "mode=%s, named=%d)\n\n",
3424 words, cum->words, cum->nregs, cum->sse_nregs,
3425 GET_MODE_NAME (mode), named);
3429 int int_nregs, sse_nregs;
3430 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3431 cum->words += words;
3432 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3434 cum->nregs -= int_nregs;
3435 cum->sse_nregs -= sse_nregs;
3436 cum->regno += int_nregs;
3437 cum->sse_regno += sse_nregs;
3440 cum->words += words;
3458 cum->words += words;
3459 cum->nregs -= words;
3460 cum->regno += words;
3462 if (cum->nregs <= 0)
3470 if (cum->float_in_sse < 2)
3473 if (cum->float_in_sse < 1)
3484 if (!type || !AGGREGATE_TYPE_P (type))
3486 cum->sse_words += words;
3487 cum->sse_nregs -= 1;
3488 cum->sse_regno += 1;
3489 if (cum->sse_nregs <= 0)
3501 if (!type || !AGGREGATE_TYPE_P (type))
3503 cum->mmx_words += words;
3504 cum->mmx_nregs -= 1;
3505 cum->mmx_regno += 1;
3506 if (cum->mmx_nregs <= 0)
3517 /* Define where to put the arguments to a function.
3518 Value is zero to push the argument on the stack,
3519 or a hard register in which to store the argument.
3521 MODE is the argument's machine mode.
3522 TYPE is the data type of the argument (as a tree).
3523 This is null for libcalls where that information may
3525 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3526 the preceding args and about the function being called.
3527 NAMED is nonzero if this argument is a named parameter
3528 (otherwise it is an extra parameter matching an ellipsis). */
3531 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3532 tree type, int named)
3534 enum machine_mode mode = orig_mode;
3537 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)