1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_partial_flag_reg_stall = m_GENERIC;
765 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
766 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
767 const int x86_use_mov0 = m_K6;
768 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
769 const int x86_read_modify_write = ~m_PENT;
770 const int x86_read_modify = ~(m_PENT | m_PPRO);
771 const int x86_split_long_moves = m_PPRO;
772 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
773 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
774 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
775 const int x86_qimode_math = ~(0);
776 const int x86_promote_qi_regs = 0;
777 /* On PPro this flag is meant to avoid partial register stalls. Just like
778 the x86_partial_reg_stall this option might be considered for Generic32
779 if our scheme for avoiding partial stalls was more effective. */
780 const int x86_himode_math = ~(m_PPRO);
781 const int x86_promote_hi_regs = m_PPRO;
782 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
786 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
787 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
789 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
792 const int x86_shift1 = ~m_486;
793 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
794 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
795 that thread 128bit SSE registers as single units versus K8 based chips that
796 divide SSE registers to two 64bit halves.
797 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
798 to allow register renaming on 128bit SSE units, but usually results in one
799 extra microop on 64bit SSE units. Experimental results shows that disabling
800 this option on P4 brings over 20% SPECfp regression, while enabling it on
801 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
803 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
804 /* Set for machines where the type and dependencies are resolved on SSE
805 register parts instead of whole registers, so we may maintain just
806 lower part of scalar values in proper format leaving the upper part
808 const int x86_sse_split_regs = m_ATHLON_K8;
809 const int x86_sse_typeless_stores = m_ATHLON_K8;
810 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
811 const int x86_use_ffreep = m_ATHLON_K8;
812 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
813 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
815 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
816 integer data in xmm registers. Which results in pretty abysmal code. */
817 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
819 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
820 /* Some CPU cores are not able to predict more than 4 branch instructions in
821 the 16 byte window. */
822 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
823 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
824 const int x86_use_bt = m_ATHLON_K8;
825 /* Compare and exchange was added for 80486. */
826 const int x86_cmpxchg = ~m_386;
827 /* Compare and exchange 8 bytes was added for pentium. */
828 const int x86_cmpxchg8b = ~(m_386 | m_486);
829 /* Compare and exchange 16 bytes was added for nocona. */
830 const int x86_cmpxchg16b = m_NOCONA;
831 /* Exchange and add was added for 80486. */
832 const int x86_xadd = ~m_386;
833 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
835 /* In case the average insn count for single function invocation is
836 lower than this constant, emit fast (but longer) prologue and
838 #define FAST_PROLOGUE_INSN_COUNT 20
840 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
841 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
842 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
843 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
845 /* Array of the smallest class containing reg number REGNO, indexed by
846 REGNO. Used by REGNO_REG_CLASS in i386.h. */
848 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
851 AREG, DREG, CREG, BREG,
853 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
855 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
856 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
859 /* flags, fpsr, dirflag, frame */
860 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
861 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
863 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
867 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
871 /* The "default" register map used in 32bit mode. */
873 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
875 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
876 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
877 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
878 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
879 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
881 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
884 static int const x86_64_int_parameter_registers[6] =
886 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
887 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
890 static int const x86_64_int_return_registers[4] =
892 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
895 /* The "default" register map used in 64bit mode. */
896 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
898 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
899 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
900 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
901 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
902 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
903 8,9,10,11,12,13,14,15, /* extended integer registers */
904 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
907 /* Define the register numbers to be used in Dwarf debugging information.
908 The SVR4 reference port C compiler uses the following register numbers
909 in its Dwarf output code:
910 0 for %eax (gcc regno = 0)
911 1 for %ecx (gcc regno = 2)
912 2 for %edx (gcc regno = 1)
913 3 for %ebx (gcc regno = 3)
914 4 for %esp (gcc regno = 7)
915 5 for %ebp (gcc regno = 6)
916 6 for %esi (gcc regno = 4)
917 7 for %edi (gcc regno = 5)
918 The following three DWARF register numbers are never generated by
919 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
920 believes these numbers have these meanings.
921 8 for %eip (no gcc equivalent)
922 9 for %eflags (gcc regno = 17)
923 10 for %trapno (no gcc equivalent)
924 It is not at all clear how we should number the FP stack registers
925 for the x86 architecture. If the version of SDB on x86/svr4 were
926 a bit less brain dead with respect to floating-point then we would
927 have a precedent to follow with respect to DWARF register numbers
928 for x86 FP registers, but the SDB on x86/svr4 is so completely
929 broken with respect to FP registers that it is hardly worth thinking
930 of it as something to strive for compatibility with.
931 The version of x86/svr4 SDB I have at the moment does (partially)
932 seem to believe that DWARF register number 11 is associated with
933 the x86 register %st(0), but that's about all. Higher DWARF
934 register numbers don't seem to be associated with anything in
935 particular, and even for DWARF regno 11, SDB only seems to under-
936 stand that it should say that a variable lives in %st(0) (when
937 asked via an `=' command) if we said it was in DWARF regno 11,
938 but SDB still prints garbage when asked for the value of the
939 variable in question (via a `/' command).
940 (Also note that the labels SDB prints for various FP stack regs
941 when doing an `x' command are all wrong.)
942 Note that these problems generally don't affect the native SVR4
943 C compiler because it doesn't allow the use of -O with -g and
944 because when it is *not* optimizing, it allocates a memory
945 location for each floating-point variable, and the memory
946 location is what gets described in the DWARF AT_location
947 attribute for the variable in question.
948 Regardless of the severe mental illness of the x86/svr4 SDB, we
949 do something sensible here and we use the following DWARF
950 register numbers. Note that these are all stack-top-relative
952 11 for %st(0) (gcc regno = 8)
953 12 for %st(1) (gcc regno = 9)
954 13 for %st(2) (gcc regno = 10)
955 14 for %st(3) (gcc regno = 11)
956 15 for %st(4) (gcc regno = 12)
957 16 for %st(5) (gcc regno = 13)
958 17 for %st(6) (gcc regno = 14)
959 18 for %st(7) (gcc regno = 15)
961 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
963 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
964 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
965 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
966 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
967 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
972 /* Test and compare insns in i386.md store the information needed to
973 generate branch and scc insns here. */
975 rtx ix86_compare_op0 = NULL_RTX;
976 rtx ix86_compare_op1 = NULL_RTX;
977 rtx ix86_compare_emitted = NULL_RTX;
979 /* Size of the register save area. */
980 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
982 /* Define the structure for the machine field in struct function. */
984 struct stack_local_entry GTY(())
989 struct stack_local_entry *next;
992 /* Structure describing stack frame layout.
993 Stack grows downward:
999 saved frame pointer if frame_pointer_needed
1000 <- HARD_FRAME_POINTER
1005 [va_arg registers] (
1006 > to_allocate <- FRAME_POINTER
1016 HOST_WIDE_INT frame;
1018 int outgoing_arguments_size;
1021 HOST_WIDE_INT to_allocate;
1022 /* The offsets relative to ARG_POINTER. */
1023 HOST_WIDE_INT frame_pointer_offset;
1024 HOST_WIDE_INT hard_frame_pointer_offset;
1025 HOST_WIDE_INT stack_pointer_offset;
1027 /* When save_regs_using_mov is set, emit prologue using
1028 move instead of push instructions. */
1029 bool save_regs_using_mov;
1032 /* Code model option. */
1033 enum cmodel ix86_cmodel;
1035 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1037 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1039 /* Which unit we are generating floating point math for. */
1040 enum fpmath_unit ix86_fpmath;
1042 /* Which cpu are we scheduling for. */
1043 enum processor_type ix86_tune;
1044 /* Which instruction set architecture to use. */
1045 enum processor_type ix86_arch;
1047 /* true if sse prefetch instruction is not NOOP. */
1048 int x86_prefetch_sse;
1050 /* ix86_regparm_string as a number */
1051 static int ix86_regparm;
1053 /* -mstackrealign option */
1054 extern int ix86_force_align_arg_pointer;
1055 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1057 /* Preferred alignment for stack boundary in bits. */
1058 unsigned int ix86_preferred_stack_boundary;
1060 /* Values 1-5: see jump.c */
1061 int ix86_branch_cost;
1063 /* Variables which are this size or smaller are put in the data/bss
1064 or ldata/lbss sections. */
1066 int ix86_section_threshold = 65536;
1068 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1069 char internal_label_prefix[16];
1070 int internal_label_prefix_len;
1072 static bool ix86_handle_option (size_t, const char *, int);
1073 static void output_pic_addr_const (FILE *, rtx, int);
1074 static void put_condition_code (enum rtx_code, enum machine_mode,
1076 static const char *get_some_local_dynamic_name (void);
1077 static int get_some_local_dynamic_name_1 (rtx *, void *);
1078 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1079 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1081 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1082 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1084 static rtx get_thread_pointer (int);
1085 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1086 static void get_pc_thunk_name (char [32], unsigned int);
1087 static rtx gen_push (rtx);
1088 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1089 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1090 static struct machine_function * ix86_init_machine_status (void);
1091 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1092 static int ix86_nsaved_regs (void);
1093 static void ix86_emit_save_regs (void);
1094 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1095 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1096 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1097 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1098 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1099 static rtx ix86_expand_aligntest (rtx, int);
1100 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1101 static int ix86_issue_rate (void);
1102 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1103 static int ia32_multipass_dfa_lookahead (void);
1104 static void ix86_init_mmx_sse_builtins (void);
1105 static rtx x86_this_parameter (tree);
1106 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1107 HOST_WIDE_INT, tree);
1108 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1109 static void x86_file_start (void);
1110 static void ix86_reorg (void);
1111 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1112 static tree ix86_build_builtin_va_list (void);
1113 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1115 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1116 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1117 static bool ix86_vector_mode_supported_p (enum machine_mode);
1119 static int ix86_address_cost (rtx);
1120 static bool ix86_cannot_force_const_mem (rtx);
1121 static rtx ix86_delegitimize_address (rtx);
1123 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1125 struct builtin_description;
1126 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1128 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1130 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1131 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1132 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1133 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1134 static rtx safe_vector_operand (rtx, enum machine_mode);
1135 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1136 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1139 static int ix86_fp_comparison_cost (enum rtx_code code);
1140 static unsigned int ix86_select_alt_pic_regnum (void);
1141 static int ix86_save_reg (unsigned int, int);
1142 static void ix86_compute_frame_layout (struct ix86_frame *);
1143 static int ix86_comp_type_attributes (tree, tree);
1144 static int ix86_function_regparm (tree, tree);
1145 const struct attribute_spec ix86_attribute_table[];
1146 static bool ix86_function_ok_for_sibcall (tree, tree);
1147 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1148 static int ix86_value_regno (enum machine_mode, tree, tree);
1149 static bool contains_128bit_aligned_vector_p (tree);
1150 static rtx ix86_struct_value_rtx (tree, int);
1151 static bool ix86_ms_bitfield_layout_p (tree);
1152 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1153 static int extended_reg_mentioned_1 (rtx *, void *);
1154 static bool ix86_rtx_costs (rtx, int, int, int *);
1155 static int min_insn_size (rtx);
1156 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1157 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1158 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1160 static void ix86_init_builtins (void);
1161 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1162 static const char *ix86_mangle_fundamental_type (tree);
1163 static tree ix86_stack_protect_fail (void);
1164 static rtx ix86_internal_arg_pointer (void);
1165 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167 /* This function is only used on Solaris. */
1168 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1171 /* Register class used for passing given 64bit part of the argument.
1172 These represent classes as documented by the PS ABI, with the exception
1173 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1174 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1177 whenever possible (upper half does contain padding).
1179 enum x86_64_reg_class
1182 X86_64_INTEGER_CLASS,
1183 X86_64_INTEGERSI_CLASS,
1190 X86_64_COMPLEX_X87_CLASS,
1193 static const char * const x86_64_reg_class_name[] = {
1194 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1195 "sseup", "x87", "x87up", "cplx87", "no"
1198 #define MAX_CLASSES 4
1200 /* Table of constants used by fldpi, fldln2, etc.... */
1201 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1202 static bool ext_80387_constants_init = 0;
1203 static void init_ext_80387_constants (void);
1204 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1205 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1206 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1207 static section *x86_64_elf_select_section (tree decl, int reloc,
1208 unsigned HOST_WIDE_INT align)
1211 /* Initialize the GCC target structure. */
1212 #undef TARGET_ATTRIBUTE_TABLE
1213 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1214 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1215 # undef TARGET_MERGE_DECL_ATTRIBUTES
1216 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1219 #undef TARGET_COMP_TYPE_ATTRIBUTES
1220 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222 #undef TARGET_INIT_BUILTINS
1223 #define TARGET_INIT_BUILTINS ix86_init_builtins
1224 #undef TARGET_EXPAND_BUILTIN
1225 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1227 #undef TARGET_ASM_FUNCTION_EPILOGUE
1228 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1230 #undef TARGET_ENCODE_SECTION_INFO
1231 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1232 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1234 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1237 #undef TARGET_ASM_OPEN_PAREN
1238 #define TARGET_ASM_OPEN_PAREN ""
1239 #undef TARGET_ASM_CLOSE_PAREN
1240 #define TARGET_ASM_CLOSE_PAREN ""
1242 #undef TARGET_ASM_ALIGNED_HI_OP
1243 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1244 #undef TARGET_ASM_ALIGNED_SI_OP
1245 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1247 #undef TARGET_ASM_ALIGNED_DI_OP
1248 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1251 #undef TARGET_ASM_UNALIGNED_HI_OP
1252 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1253 #undef TARGET_ASM_UNALIGNED_SI_OP
1254 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1255 #undef TARGET_ASM_UNALIGNED_DI_OP
1256 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1258 #undef TARGET_SCHED_ADJUST_COST
1259 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1260 #undef TARGET_SCHED_ISSUE_RATE
1261 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1262 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1263 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1264 ia32_multipass_dfa_lookahead
1266 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1267 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1270 #undef TARGET_HAVE_TLS
1271 #define TARGET_HAVE_TLS true
1273 #undef TARGET_CANNOT_FORCE_CONST_MEM
1274 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1275 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1276 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1278 #undef TARGET_DELEGITIMIZE_ADDRESS
1279 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1281 #undef TARGET_MS_BITFIELD_LAYOUT_P
1282 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1285 #undef TARGET_BINDS_LOCAL_P
1286 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1289 #undef TARGET_ASM_OUTPUT_MI_THUNK
1290 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1291 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1292 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1294 #undef TARGET_ASM_FILE_START
1295 #define TARGET_ASM_FILE_START x86_file_start
1297 #undef TARGET_DEFAULT_TARGET_FLAGS
1298 #define TARGET_DEFAULT_TARGET_FLAGS \
1300 | TARGET_64BIT_DEFAULT \
1301 | TARGET_SUBTARGET_DEFAULT \
1302 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1304 #undef TARGET_HANDLE_OPTION
1305 #define TARGET_HANDLE_OPTION ix86_handle_option
1307 #undef TARGET_RTX_COSTS
1308 #define TARGET_RTX_COSTS ix86_rtx_costs
1309 #undef TARGET_ADDRESS_COST
1310 #define TARGET_ADDRESS_COST ix86_address_cost
1312 #undef TARGET_FIXED_CONDITION_CODE_REGS
1313 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1314 #undef TARGET_CC_MODES_COMPATIBLE
1315 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1317 #undef TARGET_MACHINE_DEPENDENT_REORG
1318 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1320 #undef TARGET_BUILD_BUILTIN_VA_LIST
1321 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1323 #undef TARGET_MD_ASM_CLOBBERS
1324 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1326 #undef TARGET_PROMOTE_PROTOTYPES
1327 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1328 #undef TARGET_STRUCT_VALUE_RTX
1329 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1330 #undef TARGET_SETUP_INCOMING_VARARGS
1331 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1332 #undef TARGET_MUST_PASS_IN_STACK
1333 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1334 #undef TARGET_PASS_BY_REFERENCE
1335 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1336 #undef TARGET_INTERNAL_ARG_POINTER
1337 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1338 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1339 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1341 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1342 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1344 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1345 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1347 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1348 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1351 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1352 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1355 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1356 #undef TARGET_INSERT_ATTRIBUTES
1357 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1360 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1361 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1363 #undef TARGET_STACK_PROTECT_FAIL
1364 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1366 #undef TARGET_FUNCTION_VALUE
1367 #define TARGET_FUNCTION_VALUE ix86_function_value
1369 struct gcc_target targetm = TARGET_INITIALIZER;
1372 /* The svr4 ABI for the i386 says that records and unions are returned
1374 #ifndef DEFAULT_PCC_STRUCT_RETURN
1375 #define DEFAULT_PCC_STRUCT_RETURN 1
1378 /* Implement TARGET_HANDLE_OPTION. */
1381 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1388 target_flags &= ~MASK_3DNOW_A;
1389 target_flags_explicit |= MASK_3DNOW_A;
1396 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1397 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1404 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1405 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1412 target_flags &= ~MASK_SSE3;
1413 target_flags_explicit |= MASK_SSE3;
1422 /* Sometimes certain combinations of command options do not make
1423 sense on a particular target machine. You can define a macro
1424 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1425 defined, is executed once just after all the command options have
1428 Don't use this macro to turn on various extra optimizations for
1429 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1432 override_options (void)
1435 int ix86_tune_defaulted = 0;
1437 /* Comes from final.c -- no real reason to change it. */
1438 #define MAX_CODE_ALIGN 16
1442 const struct processor_costs *cost; /* Processor costs */
1443 const int target_enable; /* Target flags to enable. */
1444 const int target_disable; /* Target flags to disable. */
1445 const int align_loop; /* Default alignments. */
1446 const int align_loop_max_skip;
1447 const int align_jump;
1448 const int align_jump_max_skip;
1449 const int align_func;
1451 const processor_target_table[PROCESSOR_max] =
1453 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1454 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1455 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1456 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1457 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1458 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1459 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1460 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1461 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1462 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1463 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1466 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1469 const char *const name; /* processor name or nickname. */
1470 const enum processor_type processor;
1471 const enum pta_flags
1477 PTA_PREFETCH_SSE = 16,
1483 const processor_alias_table[] =
1485 {"i386", PROCESSOR_I386, 0},
1486 {"i486", PROCESSOR_I486, 0},
1487 {"i586", PROCESSOR_PENTIUM, 0},
1488 {"pentium", PROCESSOR_PENTIUM, 0},
1489 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1490 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1491 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1494 {"i686", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1496 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1497 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1500 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1501 | PTA_MMX | PTA_PREFETCH_SSE},
1502 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1505 | PTA_MMX | PTA_PREFETCH_SSE},
1506 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"k6", PROCESSOR_K6, PTA_MMX},
1509 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1513 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1514 | PTA_3DNOW | PTA_3DNOW_A},
1515 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1516 | PTA_3DNOW_A | PTA_SSE},
1517 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518 | PTA_3DNOW_A | PTA_SSE},
1519 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1520 | PTA_3DNOW_A | PTA_SSE},
1521 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1522 | PTA_SSE | PTA_SSE2 },
1523 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1524 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1525 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1532 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1535 int const pta_size = ARRAY_SIZE (processor_alias_table);
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1541 /* Set the default values for switches whose default depends on TARGET_64BIT
1542 in case they weren't overwritten by command line options. */
1545 if (flag_omit_frame_pointer == 2)
1546 flag_omit_frame_pointer = 1;
1547 if (flag_asynchronous_unwind_tables == 2)
1548 flag_asynchronous_unwind_tables = 1;
1549 if (flag_pcc_struct_return == 2)
1550 flag_pcc_struct_return = 0;
1554 if (flag_omit_frame_pointer == 2)
1555 flag_omit_frame_pointer = 0;
1556 if (flag_asynchronous_unwind_tables == 2)
1557 flag_asynchronous_unwind_tables = 0;
1558 if (flag_pcc_struct_return == 2)
1559 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1562 /* Need to check -mtune=generic first. */
1563 if (ix86_tune_string)
1565 if (!strcmp (ix86_tune_string, "generic")
1566 || !strcmp (ix86_tune_string, "i686")
1567 /* As special support for cross compilers we read -mtune=native
1568 as -mtune=generic. With native compilers we won't see the
1569 -mtune=native, as it was changed by the driver. */
1570 || !strcmp (ix86_tune_string, "native"))
1573 ix86_tune_string = "generic64";
1575 ix86_tune_string = "generic32";
1577 else if (!strncmp (ix86_tune_string, "generic", 7))
1578 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1582 if (ix86_arch_string)
1583 ix86_tune_string = ix86_arch_string;
1584 if (!ix86_tune_string)
1586 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1587 ix86_tune_defaulted = 1;
1590 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1591 need to use a sensible tune option. */
1592 if (!strcmp (ix86_tune_string, "generic")
1593 || !strcmp (ix86_tune_string, "x86-64")
1594 || !strcmp (ix86_tune_string, "i686"))
1597 ix86_tune_string = "generic64";
1599 ix86_tune_string = "generic32";
1602 if (!strcmp (ix86_tune_string, "x86-64"))
1603 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1604 "-mtune=generic instead as appropriate.");
1606 if (!ix86_arch_string)
1607 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1608 if (!strcmp (ix86_arch_string, "generic"))
1609 error ("generic CPU can be used only for -mtune= switch");
1610 if (!strncmp (ix86_arch_string, "generic", 7))
1611 error ("bad value (%s) for -march= switch", ix86_arch_string);
1613 if (ix86_cmodel_string != 0)
1615 if (!strcmp (ix86_cmodel_string, "small"))
1616 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1617 else if (!strcmp (ix86_cmodel_string, "medium"))
1618 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1620 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1621 else if (!strcmp (ix86_cmodel_string, "32"))
1622 ix86_cmodel = CM_32;
1623 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1624 ix86_cmodel = CM_KERNEL;
1625 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1626 ix86_cmodel = CM_LARGE;
1628 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1632 ix86_cmodel = CM_32;
1634 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1636 if (ix86_asm_string != 0)
1639 && !strcmp (ix86_asm_string, "intel"))
1640 ix86_asm_dialect = ASM_INTEL;
1641 else if (!strcmp (ix86_asm_string, "att"))
1642 ix86_asm_dialect = ASM_ATT;
1644 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1646 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1647 error ("code model %qs not supported in the %s bit mode",
1648 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1649 if (ix86_cmodel == CM_LARGE)
1650 sorry ("code model %<large%> not supported yet");
1651 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1652 sorry ("%i-bit mode not compiled in",
1653 (target_flags & MASK_64BIT) ? 64 : 32);
1655 for (i = 0; i < pta_size; i++)
1656 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1658 ix86_arch = processor_alias_table[i].processor;
1659 /* Default cpu tuning to the architecture. */
1660 ix86_tune = ix86_arch;
1661 if (processor_alias_table[i].flags & PTA_MMX
1662 && !(target_flags_explicit & MASK_MMX))
1663 target_flags |= MASK_MMX;
1664 if (processor_alias_table[i].flags & PTA_3DNOW
1665 && !(target_flags_explicit & MASK_3DNOW))
1666 target_flags |= MASK_3DNOW;
1667 if (processor_alias_table[i].flags & PTA_3DNOW_A
1668 && !(target_flags_explicit & MASK_3DNOW_A))
1669 target_flags |= MASK_3DNOW_A;
1670 if (processor_alias_table[i].flags & PTA_SSE
1671 && !(target_flags_explicit & MASK_SSE))
1672 target_flags |= MASK_SSE;
1673 if (processor_alias_table[i].flags & PTA_SSE2
1674 && !(target_flags_explicit & MASK_SSE2))
1675 target_flags |= MASK_SSE2;
1676 if (processor_alias_table[i].flags & PTA_SSE3
1677 && !(target_flags_explicit & MASK_SSE3))
1678 target_flags |= MASK_SSE3;
1679 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1680 x86_prefetch_sse = true;
1681 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1682 error ("CPU you selected does not support x86-64 "
1688 error ("bad value (%s) for -march= switch", ix86_arch_string);
1690 for (i = 0; i < pta_size; i++)
1691 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1693 ix86_tune = processor_alias_table[i].processor;
1694 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1696 if (ix86_tune_defaulted)
1698 ix86_tune_string = "x86-64";
1699 for (i = 0; i < pta_size; i++)
1700 if (! strcmp (ix86_tune_string,
1701 processor_alias_table[i].name))
1703 ix86_tune = processor_alias_table[i].processor;
1706 error ("CPU you selected does not support x86-64 "
1709 /* Intel CPUs have always interpreted SSE prefetch instructions as
1710 NOPs; so, we can enable SSE prefetch instructions even when
1711 -mtune (rather than -march) points us to a processor that has them.
1712 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1713 higher processors. */
1714 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1715 x86_prefetch_sse = true;
1719 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1722 ix86_cost = &size_cost;
1724 ix86_cost = processor_target_table[ix86_tune].cost;
1725 target_flags |= processor_target_table[ix86_tune].target_enable;
1726 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1728 /* Arrange to set up i386_stack_locals for all functions. */
1729 init_machine_status = ix86_init_machine_status;
1731 /* Validate -mregparm= value. */
1732 if (ix86_regparm_string)
1734 i = atoi (ix86_regparm_string);
1735 if (i < 0 || i > REGPARM_MAX)
1736 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1742 ix86_regparm = REGPARM_MAX;
1744 /* If the user has provided any of the -malign-* options,
1745 warn and use that value only if -falign-* is not set.
1746 Remove this code in GCC 3.2 or later. */
1747 if (ix86_align_loops_string)
1749 warning (0, "-malign-loops is obsolete, use -falign-loops");
1750 if (align_loops == 0)
1752 i = atoi (ix86_align_loops_string);
1753 if (i < 0 || i > MAX_CODE_ALIGN)
1754 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1756 align_loops = 1 << i;
1760 if (ix86_align_jumps_string)
1762 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1763 if (align_jumps == 0)
1765 i = atoi (ix86_align_jumps_string);
1766 if (i < 0 || i > MAX_CODE_ALIGN)
1767 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1769 align_jumps = 1 << i;
1773 if (ix86_align_funcs_string)
1775 warning (0, "-malign-functions is obsolete, use -falign-functions");
1776 if (align_functions == 0)
1778 i = atoi (ix86_align_funcs_string);
1779 if (i < 0 || i > MAX_CODE_ALIGN)
1780 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1782 align_functions = 1 << i;
1786 /* Default align_* from the processor table. */
1787 if (align_loops == 0)
1789 align_loops = processor_target_table[ix86_tune].align_loop;
1790 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1792 if (align_jumps == 0)
1794 align_jumps = processor_target_table[ix86_tune].align_jump;
1795 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1797 if (align_functions == 0)
1799 align_functions = processor_target_table[ix86_tune].align_func;
1802 /* Validate -mpreferred-stack-boundary= value, or provide default.
1803 The default of 128 bits is for Pentium III's SSE __m128, but we
1804 don't want additional code to keep the stack aligned when
1805 optimizing for code size. */
1806 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1808 if (ix86_preferred_stack_boundary_string)
1810 i = atoi (ix86_preferred_stack_boundary_string);
1811 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1812 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1813 TARGET_64BIT ? 4 : 2);
1815 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1818 /* Validate -mbranch-cost= value, or provide default. */
1819 ix86_branch_cost = ix86_cost->branch_cost;
1820 if (ix86_branch_cost_string)
1822 i = atoi (ix86_branch_cost_string);
1824 error ("-mbranch-cost=%d is not between 0 and 5", i);
1826 ix86_branch_cost = i;
1828 if (ix86_section_threshold_string)
1830 i = atoi (ix86_section_threshold_string);
1832 error ("-mlarge-data-threshold=%d is negative", i);
1834 ix86_section_threshold = i;
1837 if (ix86_tls_dialect_string)
1839 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1840 ix86_tls_dialect = TLS_DIALECT_GNU;
1841 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1842 ix86_tls_dialect = TLS_DIALECT_GNU2;
1843 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1844 ix86_tls_dialect = TLS_DIALECT_SUN;
1846 error ("bad value (%s) for -mtls-dialect= switch",
1847 ix86_tls_dialect_string);
1850 /* Keep nonleaf frame pointers. */
1851 if (flag_omit_frame_pointer)
1852 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1853 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1854 flag_omit_frame_pointer = 1;
1856 /* If we're doing fast math, we don't care about comparison order
1857 wrt NaNs. This lets us use a shorter comparison sequence. */
1858 if (flag_unsafe_math_optimizations)
1859 target_flags &= ~MASK_IEEE_FP;
1861 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1862 since the insns won't need emulation. */
1863 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1864 target_flags &= ~MASK_NO_FANCY_MATH_387;
1866 /* Likewise, if the target doesn't have a 387, or we've specified
1867 software floating point, don't use 387 inline intrinsics. */
1869 target_flags |= MASK_NO_FANCY_MATH_387;
1871 /* Turn on SSE2 builtins for -msse3. */
1873 target_flags |= MASK_SSE2;
1875 /* Turn on SSE builtins for -msse2. */
1877 target_flags |= MASK_SSE;
1879 /* Turn on MMX builtins for -msse. */
1882 target_flags |= MASK_MMX & ~target_flags_explicit;
1883 x86_prefetch_sse = true;
1886 /* Turn on MMX builtins for 3Dnow. */
1888 target_flags |= MASK_MMX;
1892 if (TARGET_ALIGN_DOUBLE)
1893 error ("-malign-double makes no sense in the 64bit mode");
1895 error ("-mrtd calling convention not supported in the 64bit mode");
1897 /* Enable by default the SSE and MMX builtins. Do allow the user to
1898 explicitly disable any of these. In particular, disabling SSE and
1899 MMX for kernel code is extremely useful. */
1901 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1902 & ~target_flags_explicit);
1906 /* i386 ABI does not specify red zone. It still makes sense to use it
1907 when programmer takes care to stack from being destroyed. */
1908 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1909 target_flags |= MASK_NO_RED_ZONE;
1912 /* Accept -msseregparm only if at least SSE support is enabled. */
1913 if (TARGET_SSEREGPARM
1915 error ("-msseregparm used without SSE enabled");
1917 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1919 if (ix86_fpmath_string != 0)
1921 if (! strcmp (ix86_fpmath_string, "387"))
1922 ix86_fpmath = FPMATH_387;
1923 else if (! strcmp (ix86_fpmath_string, "sse"))
1927 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1928 ix86_fpmath = FPMATH_387;
1931 ix86_fpmath = FPMATH_SSE;
1933 else if (! strcmp (ix86_fpmath_string, "387,sse")
1934 || ! strcmp (ix86_fpmath_string, "sse,387"))
1938 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1939 ix86_fpmath = FPMATH_387;
1941 else if (!TARGET_80387)
1943 warning (0, "387 instruction set disabled, using SSE arithmetics");
1944 ix86_fpmath = FPMATH_SSE;
1947 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1950 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1953 /* If the i387 is disabled, then do not return values in it. */
1955 target_flags &= ~MASK_FLOAT_RETURNS;
1957 if ((x86_accumulate_outgoing_args & TUNEMASK)
1958 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1960 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1962 /* ??? Unwind info is not correct around the CFG unless either a frame
1963 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1964 unwind info generation to be aware of the CFG and propagating states
1966 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1967 || flag_exceptions || flag_non_call_exceptions)
1968 && flag_omit_frame_pointer
1969 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1971 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1972 warning (0, "unwind tables currently require either a frame pointer "
1973 "or -maccumulate-outgoing-args for correctness");
1974 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1977 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1980 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1981 p = strchr (internal_label_prefix, 'X');
1982 internal_label_prefix_len = p - internal_label_prefix;
1986 /* When scheduling description is not available, disable scheduler pass
1987 so it won't slow down the compilation and make x87 code slower. */
1988 if (!TARGET_SCHEDULE)
1989 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1992 /* switch to the appropriate section for output of DECL.
1993 DECL is either a `VAR_DECL' node or a constant of some sort.
1994 RELOC indicates whether forming the initial value of DECL requires
1995 link-time relocations. */
1998 x86_64_elf_select_section (tree decl, int reloc,
1999 unsigned HOST_WIDE_INT align)
2001 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2002 && ix86_in_large_data_p (decl))
2004 const char *sname = NULL;
2005 unsigned int flags = SECTION_WRITE;
2006 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2011 case SECCAT_DATA_REL:
2012 sname = ".ldata.rel";
2014 case SECCAT_DATA_REL_LOCAL:
2015 sname = ".ldata.rel.local";
2017 case SECCAT_DATA_REL_RO:
2018 sname = ".ldata.rel.ro";
2020 case SECCAT_DATA_REL_RO_LOCAL:
2021 sname = ".ldata.rel.ro.local";
2025 flags |= SECTION_BSS;
2028 case SECCAT_RODATA_MERGE_STR:
2029 case SECCAT_RODATA_MERGE_STR_INIT:
2030 case SECCAT_RODATA_MERGE_CONST:
2034 case SECCAT_SRODATA:
2041 /* We don't split these for medium model. Place them into
2042 default sections and hope for best. */
2047 /* We might get called with string constants, but get_named_section
2048 doesn't like them as they are not DECLs. Also, we need to set
2049 flags in that case. */
2051 return get_section (sname, flags, NULL);
2052 return get_named_section (decl, sname, reloc);
2055 return default_elf_select_section (decl, reloc, align);
2058 /* Build up a unique section name, expressed as a
2059 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2060 RELOC indicates whether the initial value of EXP requires
2061 link-time relocations. */
2064 x86_64_elf_unique_section (tree decl, int reloc)
2066 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2067 && ix86_in_large_data_p (decl))
2069 const char *prefix = NULL;
2070 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2071 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2073 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2076 case SECCAT_DATA_REL:
2077 case SECCAT_DATA_REL_LOCAL:
2078 case SECCAT_DATA_REL_RO:
2079 case SECCAT_DATA_REL_RO_LOCAL:
2080 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2083 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2086 case SECCAT_RODATA_MERGE_STR:
2087 case SECCAT_RODATA_MERGE_STR_INIT:
2088 case SECCAT_RODATA_MERGE_CONST:
2089 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2091 case SECCAT_SRODATA:
2098 /* We don't split these for medium model. Place them into
2099 default sections and hope for best. */
2107 plen = strlen (prefix);
2109 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2110 name = targetm.strip_name_encoding (name);
2111 nlen = strlen (name);
2113 string = alloca (nlen + plen + 1);
2114 memcpy (string, prefix, plen);
2115 memcpy (string + plen, name, nlen + 1);
2117 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2121 default_unique_section (decl, reloc);
2124 #ifdef COMMON_ASM_OP
2125 /* This says how to output assembler code to declare an
2126 uninitialized external linkage data object.
2128 For medium model x86-64 we need to use .largecomm opcode for
2131 x86_elf_aligned_common (FILE *file,
2132 const char *name, unsigned HOST_WIDE_INT size,
2135 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2136 && size > (unsigned int)ix86_section_threshold)
2137 fprintf (file, ".largecomm\t");
2139 fprintf (file, "%s", COMMON_ASM_OP);
2140 assemble_name (file, name);
2141 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2142 size, align / BITS_PER_UNIT);
2145 /* Utility function for targets to use in implementing
2146 ASM_OUTPUT_ALIGNED_BSS. */
2149 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2150 const char *name, unsigned HOST_WIDE_INT size,
2153 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2154 && size > (unsigned int)ix86_section_threshold)
2155 switch_to_section (get_named_section (decl, ".lbss", 0));
2157 switch_to_section (bss_section);
2158 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2159 #ifdef ASM_DECLARE_OBJECT_NAME
2160 last_assemble_variable_decl = decl;
2161 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2163 /* Standard thing is just output label for the object. */
2164 ASM_OUTPUT_LABEL (file, name);
2165 #endif /* ASM_DECLARE_OBJECT_NAME */
2166 ASM_OUTPUT_SKIP (file, size ? size : 1);
2171 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2173 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2174 make the problem with not enough registers even worse. */
2175 #ifdef INSN_SCHEDULING
2177 flag_schedule_insns = 0;
2181 /* The Darwin libraries never set errno, so we might as well
2182 avoid calling them when that's the only reason we would. */
2183 flag_errno_math = 0;
2185 /* The default values of these switches depend on the TARGET_64BIT
2186 that is not known at this moment. Mark these values with 2 and
2187 let user the to override these. In case there is no command line option
2188 specifying them, we will set the defaults in override_options. */
2190 flag_omit_frame_pointer = 2;
2191 flag_pcc_struct_return = 2;
2192 flag_asynchronous_unwind_tables = 2;
2193 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2194 SUBTARGET_OPTIMIZATION_OPTIONS;
2198 /* Table of valid machine attributes. */
2199 const struct attribute_spec ix86_attribute_table[] =
2201 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2202 /* Stdcall attribute says callee is responsible for popping arguments
2203 if they are not variable. */
2204 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2205 /* Fastcall attribute says callee is responsible for popping arguments
2206 if they are not variable. */
2207 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2208 /* Cdecl attribute says the callee is a normal C declaration */
2209 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2210 /* Regparm attribute specifies how many integer arguments are to be
2211 passed in registers. */
2212 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2213 /* Sseregparm attribute says we are using x86_64 calling conventions
2214 for FP arguments. */
2215 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2216 /* force_align_arg_pointer says this function realigns the stack at entry. */
2217 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2218 false, true, true, ix86_handle_cconv_attribute },
2219 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2220 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2221 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2222 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2224 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2225 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2226 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2227 SUBTARGET_ATTRIBUTE_TABLE,
2229 { NULL, 0, 0, false, false, false, NULL }
2232 /* Decide whether we can make a sibling call to a function. DECL is the
2233 declaration of the function being targeted by the call and EXP is the
2234 CALL_EXPR representing the call. */
2237 ix86_function_ok_for_sibcall (tree decl, tree exp)
2242 /* If we are generating position-independent code, we cannot sibcall
2243 optimize any indirect call, or a direct call to a global function,
2244 as the PLT requires %ebx be live. */
2245 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2252 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2253 if (POINTER_TYPE_P (func))
2254 func = TREE_TYPE (func);
2257 /* Check that the return value locations are the same. Like
2258 if we are returning floats on the 80387 register stack, we cannot
2259 make a sibcall from a function that doesn't return a float to a
2260 function that does or, conversely, from a function that does return
2261 a float to a function that doesn't; the necessary stack adjustment
2262 would not be executed. This is also the place we notice
2263 differences in the return value ABI. Note that it is ok for one
2264 of the functions to have void return type as long as the return
2265 value of the other is passed in a register. */
2266 a = ix86_function_value (TREE_TYPE (exp), func, false);
2267 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2269 if (STACK_REG_P (a) || STACK_REG_P (b))
2271 if (!rtx_equal_p (a, b))
2274 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2276 else if (!rtx_equal_p (a, b))
2279 /* If this call is indirect, we'll need to be able to use a call-clobbered
2280 register for the address of the target function. Make sure that all
2281 such registers are not used for passing parameters. */
2282 if (!decl && !TARGET_64BIT)
2286 /* We're looking at the CALL_EXPR, we need the type of the function. */
2287 type = TREE_OPERAND (exp, 0); /* pointer expression */
2288 type = TREE_TYPE (type); /* pointer type */
2289 type = TREE_TYPE (type); /* function type */
2291 if (ix86_function_regparm (type, NULL) >= 3)
2293 /* ??? Need to count the actual number of registers to be used,
2294 not the possible number of registers. Fix later. */
2299 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2300 /* Dllimport'd functions are also called indirectly. */
2301 if (decl && DECL_DLLIMPORT_P (decl)
2302 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2306 /* If we forced aligned the stack, then sibcalling would unalign the
2307 stack, which may break the called function. */
2308 if (cfun->machine->force_align_arg_pointer)
2311 /* Otherwise okay. That also includes certain types of indirect calls. */
2315 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2316 calling convention attributes;
2317 arguments as in struct attribute_spec.handler. */
2320 ix86_handle_cconv_attribute (tree *node, tree name,
2322 int flags ATTRIBUTE_UNUSED,
2325 if (TREE_CODE (*node) != FUNCTION_TYPE
2326 && TREE_CODE (*node) != METHOD_TYPE
2327 && TREE_CODE (*node) != FIELD_DECL
2328 && TREE_CODE (*node) != TYPE_DECL)
2330 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2331 IDENTIFIER_POINTER (name));
2332 *no_add_attrs = true;
2336 /* Can combine regparm with all attributes but fastcall. */
2337 if (is_attribute_p ("regparm", name))
2341 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2343 error ("fastcall and regparm attributes are not compatible");
2346 cst = TREE_VALUE (args);
2347 if (TREE_CODE (cst) != INTEGER_CST)
2349 warning (OPT_Wattributes,
2350 "%qs attribute requires an integer constant argument",
2351 IDENTIFIER_POINTER (name));
2352 *no_add_attrs = true;
2354 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2356 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2357 IDENTIFIER_POINTER (name), REGPARM_MAX);
2358 *no_add_attrs = true;
2362 && lookup_attribute (ix86_force_align_arg_pointer_string,
2363 TYPE_ATTRIBUTES (*node))
2364 && compare_tree_int (cst, REGPARM_MAX-1))
2366 error ("%s functions limited to %d register parameters",
2367 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2375 warning (OPT_Wattributes, "%qs attribute ignored",
2376 IDENTIFIER_POINTER (name));
2377 *no_add_attrs = true;
2381 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2382 if (is_attribute_p ("fastcall", name))
2384 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2386 error ("fastcall and cdecl attributes are not compatible");
2388 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2390 error ("fastcall and stdcall attributes are not compatible");
2392 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2394 error ("fastcall and regparm attributes are not compatible");
2398 /* Can combine stdcall with fastcall (redundant), regparm and
2400 else if (is_attribute_p ("stdcall", name))
2402 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2404 error ("stdcall and cdecl attributes are not compatible");
2406 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2408 error ("stdcall and fastcall attributes are not compatible");
2412 /* Can combine cdecl with regparm and sseregparm. */
2413 else if (is_attribute_p ("cdecl", name))
2415 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2417 error ("stdcall and cdecl attributes are not compatible");
2419 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2421 error ("fastcall and cdecl attributes are not compatible");
2425 /* Can combine sseregparm with all attributes. */
2430 /* Return 0 if the attributes for two types are incompatible, 1 if they
2431 are compatible, and 2 if they are nearly compatible (which causes a
2432 warning to be generated). */
2435 ix86_comp_type_attributes (tree type1, tree type2)
2437 /* Check for mismatch of non-default calling convention. */
2438 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2440 if (TREE_CODE (type1) != FUNCTION_TYPE)
2443 /* Check for mismatched fastcall/regparm types. */
2444 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2445 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2446 || (ix86_function_regparm (type1, NULL)
2447 != ix86_function_regparm (type2, NULL)))
2450 /* Check for mismatched sseregparm types. */
2451 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2452 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2455 /* Check for mismatched return types (cdecl vs stdcall). */
2456 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2457 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2463 /* Return the regparm value for a function with the indicated TYPE and DECL.
2464 DECL may be NULL when calling function indirectly
2465 or considering a libcall. */
2468 ix86_function_regparm (tree type, tree decl)
2471 int regparm = ix86_regparm;
2472 bool user_convention = false;
2476 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2479 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2480 user_convention = true;
2483 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2486 user_convention = true;
2489 /* Use register calling convention for local functions when possible. */
2490 if (!TARGET_64BIT && !user_convention && decl
2491 && flag_unit_at_a_time && !profile_flag)
2493 struct cgraph_local_info *i = cgraph_local_info (decl);
2496 int local_regparm, globals = 0, regno;
2498 /* Make sure no regparm register is taken by a global register
2500 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2501 if (global_regs[local_regparm])
2503 /* We can't use regparm(3) for nested functions as these use
2504 static chain pointer in third argument. */
2505 if (local_regparm == 3
2506 && decl_function_context (decl)
2507 && !DECL_NO_STATIC_CHAIN (decl))
2509 /* If the function realigns its stackpointer, the
2510 prologue will clobber %ecx. If we've already
2511 generated code for the callee, the callee
2512 DECL_STRUCT_FUNCTION is gone, so we fall back to
2513 scanning the attributes for the self-realigning
2515 if ((DECL_STRUCT_FUNCTION (decl)
2516 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2517 || (!DECL_STRUCT_FUNCTION (decl)
2518 && lookup_attribute (ix86_force_align_arg_pointer_string,
2519 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2521 /* Each global register variable increases register preassure,
2522 so the more global reg vars there are, the smaller regparm
2523 optimization use, unless requested by the user explicitly. */
2524 for (regno = 0; regno < 6; regno++)
2525 if (global_regs[regno])
2528 = globals < local_regparm ? local_regparm - globals : 0;
2530 if (local_regparm > regparm)
2531 regparm = local_regparm;
2538 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2539 in SSE registers for a function with the indicated TYPE and DECL.
2540 DECL may be NULL when calling function indirectly
2541 or considering a libcall. Otherwise return 0. */
2544 ix86_function_sseregparm (tree type, tree decl)
2546 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2547 by the sseregparm attribute. */
2548 if (TARGET_SSEREGPARM
2550 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2555 error ("Calling %qD with attribute sseregparm without "
2556 "SSE/SSE2 enabled", decl);
2558 error ("Calling %qT with attribute sseregparm without "
2559 "SSE/SSE2 enabled", type);
2566 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2567 in SSE registers even for 32-bit mode and not just 3, but up to
2568 8 SSE arguments in registers. */
2569 if (!TARGET_64BIT && decl
2570 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2572 struct cgraph_local_info *i = cgraph_local_info (decl);
2574 return TARGET_SSE2 ? 2 : 1;
2580 /* Return true if EAX is live at the start of the function. Used by
2581 ix86_expand_prologue to determine if we need special help before
2582 calling allocate_stack_worker. */
2585 ix86_eax_live_at_start_p (void)
2587 /* Cheat. Don't bother working forward from ix86_function_regparm
2588 to the function type to whether an actual argument is located in
2589 eax. Instead just look at cfg info, which is still close enough
2590 to correct at this point. This gives false positives for broken
2591 functions that might use uninitialized data that happens to be
2592 allocated in eax, but who cares? */
2593 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2596 /* Value is the number of bytes of arguments automatically
2597 popped when returning from a subroutine call.
2598 FUNDECL is the declaration node of the function (as a tree),
2599 FUNTYPE is the data type of the function (as a tree),
2600 or for a library call it is an identifier node for the subroutine name.
2601 SIZE is the number of bytes of arguments passed on the stack.
2603 On the 80386, the RTD insn may be used to pop them if the number
2604 of args is fixed, but if the number is variable then the caller
2605 must pop them all. RTD can't be used for library calls now
2606 because the library is compiled with the Unix compiler.
2607 Use of RTD is a selectable option, since it is incompatible with
2608 standard Unix calling sequences. If the option is not selected,
2609 the caller must always pop the args.
2611 The attribute stdcall is equivalent to RTD on a per module basis. */
2614 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2616 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2618 /* Cdecl functions override -mrtd, and never pop the stack. */
2619 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2621 /* Stdcall and fastcall functions will pop the stack if not
2623 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2624 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2628 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2629 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2630 == void_type_node)))
2634 /* Lose any fake structure return argument if it is passed on the stack. */
2635 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2637 && !KEEP_AGGREGATE_RETURN_POINTER)
2639 int nregs = ix86_function_regparm (funtype, fundecl);
2642 return GET_MODE_SIZE (Pmode);
2648 /* Argument support functions. */
2650 /* Return true when register may be used to pass function parameters. */
2652 ix86_function_arg_regno_p (int regno)
2656 return (regno < REGPARM_MAX
2657 || (TARGET_MMX && MMX_REGNO_P (regno)
2658 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2659 || (TARGET_SSE && SSE_REGNO_P (regno)
2660 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2662 if (TARGET_SSE && SSE_REGNO_P (regno)
2663 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2665 /* RAX is used as hidden argument to va_arg functions. */
2668 for (i = 0; i < REGPARM_MAX; i++)
2669 if (regno == x86_64_int_parameter_registers[i])
2674 /* Return if we do not know how to pass TYPE solely in registers. */
2677 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2679 if (must_pass_in_stack_var_size_or_pad (mode, type))
2682 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2683 The layout_type routine is crafty and tries to trick us into passing
2684 currently unsupported vector types on the stack by using TImode. */
2685 return (!TARGET_64BIT && mode == TImode
2686 && type && TREE_CODE (type) != VECTOR_TYPE);
2689 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2690 for a call to a function whose data type is FNTYPE.
2691 For a library call, FNTYPE is 0. */
2694 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2695 tree fntype, /* tree ptr for function decl */
2696 rtx libname, /* SYMBOL_REF of library name or 0 */
2699 static CUMULATIVE_ARGS zero_cum;
2700 tree param, next_param;
2702 if (TARGET_DEBUG_ARG)
2704 fprintf (stderr, "\ninit_cumulative_args (");
2706 fprintf (stderr, "fntype code = %s, ret code = %s",
2707 tree_code_name[(int) TREE_CODE (fntype)],
2708 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2710 fprintf (stderr, "no fntype");
2713 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2718 /* Set up the number of registers to use for passing arguments. */
2719 cum->nregs = ix86_regparm;
2721 cum->sse_nregs = SSE_REGPARM_MAX;
2723 cum->mmx_nregs = MMX_REGPARM_MAX;
2724 cum->warn_sse = true;
2725 cum->warn_mmx = true;
2726 cum->maybe_vaarg = false;
2728 /* Use ecx and edx registers if function has fastcall attribute,
2729 else look for regparm information. */
2730 if (fntype && !TARGET_64BIT)
2732 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2738 cum->nregs = ix86_function_regparm (fntype, fndecl);
2741 /* Set up the number of SSE registers used for passing SFmode
2742 and DFmode arguments. Warn for mismatching ABI. */
2743 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2745 /* Determine if this function has variable arguments. This is
2746 indicated by the last argument being 'void_type_mode' if there
2747 are no variable arguments. If there are variable arguments, then
2748 we won't pass anything in registers in 32-bit mode. */
2750 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2752 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2753 param != 0; param = next_param)
2755 next_param = TREE_CHAIN (param);
2756 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2766 cum->float_in_sse = 0;
2768 cum->maybe_vaarg = true;
2772 if ((!fntype && !libname)
2773 || (fntype && !TYPE_ARG_TYPES (fntype)))
2774 cum->maybe_vaarg = true;
2776 if (TARGET_DEBUG_ARG)
2777 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2782 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2783 But in the case of vector types, it is some vector mode.
2785 When we have only some of our vector isa extensions enabled, then there
2786 are some modes for which vector_mode_supported_p is false. For these
2787 modes, the generic vector support in gcc will choose some non-vector mode
2788 in order to implement the type. By computing the natural mode, we'll
2789 select the proper ABI location for the operand and not depend on whatever
2790 the middle-end decides to do with these vector types. */
2792 static enum machine_mode
2793 type_natural_mode (tree type)
2795 enum machine_mode mode = TYPE_MODE (type);
2797 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2799 HOST_WIDE_INT size = int_size_in_bytes (type);
2800 if ((size == 8 || size == 16)
2801 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2802 && TYPE_VECTOR_SUBPARTS (type) > 1)
2804 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2806 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2807 mode = MIN_MODE_VECTOR_FLOAT;
2809 mode = MIN_MODE_VECTOR_INT;
2811 /* Get the mode which has this inner mode and number of units. */
2812 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2813 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2814 && GET_MODE_INNER (mode) == innermode)
2824 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2825 this may not agree with the mode that the type system has chosen for the
2826 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2827 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2830 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2835 if (orig_mode != BLKmode)
2836 tmp = gen_rtx_REG (orig_mode, regno);
2839 tmp = gen_rtx_REG (mode, regno);
2840 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2841 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2847 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2848 of this code is to classify each 8bytes of incoming argument by the register
2849 class and assign registers accordingly. */
2851 /* Return the union class of CLASS1 and CLASS2.
2852 See the x86-64 PS ABI for details. */
2854 static enum x86_64_reg_class
2855 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2857 /* Rule #1: If both classes are equal, this is the resulting class. */
2858 if (class1 == class2)
2861 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2863 if (class1 == X86_64_NO_CLASS)
2865 if (class2 == X86_64_NO_CLASS)
2868 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2869 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2870 return X86_64_MEMORY_CLASS;
2872 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2873 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2874 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2875 return X86_64_INTEGERSI_CLASS;
2876 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2877 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2878 return X86_64_INTEGER_CLASS;
2880 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2882 if (class1 == X86_64_X87_CLASS
2883 || class1 == X86_64_X87UP_CLASS
2884 || class1 == X86_64_COMPLEX_X87_CLASS
2885 || class2 == X86_64_X87_CLASS
2886 || class2 == X86_64_X87UP_CLASS
2887 || class2 == X86_64_COMPLEX_X87_CLASS)
2888 return X86_64_MEMORY_CLASS;
2890 /* Rule #6: Otherwise class SSE is used. */
2891 return X86_64_SSE_CLASS;
2894 /* Classify the argument of type TYPE and mode MODE.
2895 CLASSES will be filled by the register class used to pass each word
2896 of the operand. The number of words is returned. In case the parameter
2897 should be passed in memory, 0 is returned. As a special case for zero
2898 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2900 BIT_OFFSET is used internally for handling records and specifies offset
2901 of the offset in bits modulo 256 to avoid overflow cases.
2903 See the x86-64 PS ABI for details.
2907 classify_argument (enum machine_mode mode, tree type,
2908 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2910 HOST_WIDE_INT bytes =
2911 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2912 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2914 /* Variable sized entities are always passed/returned in memory. */
2918 if (mode != VOIDmode
2919 && targetm.calls.must_pass_in_stack (mode, type))
2922 if (type && AGGREGATE_TYPE_P (type))
2926 enum x86_64_reg_class subclasses[MAX_CLASSES];
2928 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2932 for (i = 0; i < words; i++)
2933 classes[i] = X86_64_NO_CLASS;
2935 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2936 signalize memory class, so handle it as special case. */
2939 classes[0] = X86_64_NO_CLASS;
2943 /* Classify each field of record and merge classes. */
2944 switch (TREE_CODE (type))
2947 /* For classes first merge in the field of the subclasses. */
2948 if (TYPE_BINFO (type))
2950 tree binfo, base_binfo;
2953 for (binfo = TYPE_BINFO (type), basenum = 0;
2954 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2957 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2958 tree type = BINFO_TYPE (base_binfo);
2960 num = classify_argument (TYPE_MODE (type),
2962 (offset + bit_offset) % 256);
2965 for (i = 0; i < num; i++)
2967 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2969 merge_classes (subclasses[i], classes[i + pos]);
2973 /* And now merge the fields of structure. */
2974 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2976 if (TREE_CODE (field) == FIELD_DECL)
2980 if (TREE_TYPE (field) == error_mark_node)
2983 /* Bitfields are always classified as integer. Handle them
2984 early, since later code would consider them to be
2985 misaligned integers. */
2986 if (DECL_BIT_FIELD (field))
2988 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2989 i < ((int_bit_position (field) + (bit_offset % 64))
2990 + tree_low_cst (DECL_SIZE (field), 0)
2993 merge_classes (X86_64_INTEGER_CLASS,
2998 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2999 TREE_TYPE (field), subclasses,
3000 (int_bit_position (field)
3001 + bit_offset) % 256);
3004 for (i = 0; i < num; i++)
3007 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3009 merge_classes (subclasses[i], classes[i + pos]);
3017 /* Arrays are handled as small records. */
3020 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3021 TREE_TYPE (type), subclasses, bit_offset);