1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
70 struct processor_costs size_cost = { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
123 struct processor_costs i386_cost = { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
175 struct processor_costs i486_cost = { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
227 struct processor_costs pentium_cost = {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
279 struct processor_costs pentiumpro_cost = {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
331 struct processor_costs k6_cost = {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
383 struct processor_costs athlon_cost = {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
435 struct processor_costs k8_cost = {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
487 struct processor_costs pentium4_cost = {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
539 struct processor_costs nocona_cost = {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
592 struct processor_costs generic64_cost = {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
651 struct processor_costs generic32_cost = {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs *ix86_cost = &pentium_cost;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
726 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
727 const int x86_zero_extend_with_and = m_486 | m_PENT;
728 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
729 const int x86_double_with_add = ~m_386;
730 const int x86_use_bit_test = m_386;
731 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
732 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
733 const int x86_fisttp = m_NOCONA;
734 const int x86_3dnow_a = m_ATHLON_K8;
735 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints = 0;
741 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthesized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall = m_PPRO;
751 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
752 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
753 const int x86_use_mov0 = m_K6;
754 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
755 const int x86_read_modify_write = ~m_PENT;
756 const int x86_read_modify = ~(m_PENT | m_PPRO);
757 const int x86_split_long_moves = m_PPRO;
758 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
759 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
760 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
761 const int x86_qimode_math = ~(0);
762 const int x86_promote_qi_regs = 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math = ~(m_PPRO);
767 const int x86_promote_hi_regs = m_PPRO;
768 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
769 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
770 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
771 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
772 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
773 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
774 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
775 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
776 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
777 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
778 const int x86_shift1 = ~m_486;
779 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
780 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
789 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
794 const int x86_sse_split_regs = m_ATHLON_K8;
795 const int x86_sse_typeless_stores = m_ATHLON_K8;
796 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
797 const int x86_use_ffreep = m_ATHLON_K8;
798 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
799 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
809 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
810 const int x86_use_bt = m_ATHLON_K8;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg = ~m_386;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b = ~(m_386 | m_486);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b = m_NOCONA;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd = ~m_386;
819 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
828 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
829 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
837 AREG, DREG, CREG, BREG,
839 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
841 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
842 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
847 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
849 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
851 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
852 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
853 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
876 static int const x86_64_int_return_registers[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0 = NULL_RTX;
962 rtx ix86_compare_op1 = NULL_RTX;
963 rtx ix86_compare_emitted = NULL_RTX;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry GTY(())
975 struct stack_local_entry *next;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
992 > to_allocate <- FRAME_POINTER
1002 HOST_WIDE_INT frame;
1004 int outgoing_arguments_size;
1007 HOST_WIDE_INT to_allocate;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset;
1010 HOST_WIDE_INT hard_frame_pointer_offset;
1011 HOST_WIDE_INT stack_pointer_offset;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel;
1021 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1023 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold = 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix[16];
1052 int internal_label_prefix_len;
1054 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1055 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1057 static bool ix86_handle_option (size_t, const char *, int);
1058 static void output_pic_addr_const (FILE *, rtx, int);
1059 static void put_condition_code (enum rtx_code, enum machine_mode,
1061 static const char *get_some_local_dynamic_name (void);
1062 static int get_some_local_dynamic_name_1 (rtx *, void *);
1063 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1064 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1066 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1067 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1069 static rtx get_thread_pointer (int);
1070 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1071 static void get_pc_thunk_name (char [32], unsigned int);
1072 static rtx gen_push (rtx);
1073 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1074 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1075 static struct machine_function * ix86_init_machine_status (void);
1076 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1077 static int ix86_nsaved_regs (void);
1078 static void ix86_emit_save_regs (void);
1079 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1080 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1081 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1082 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1083 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1084 static rtx ix86_expand_aligntest (rtx, int);
1085 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1086 static int ix86_issue_rate (void);
1087 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1088 static int ia32_multipass_dfa_lookahead (void);
1089 static void ix86_init_mmx_sse_builtins (void);
1090 static void ix86_init_sse_abi_builtins (void);
1091 static rtx x86_this_parameter (tree);
1092 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1093 HOST_WIDE_INT, tree);
1094 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1095 static void x86_file_start (void);
1096 static void ix86_reorg (void);
1097 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1098 static tree ix86_build_builtin_va_list (void);
1099 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1101 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1102 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1103 static bool ix86_vector_mode_supported_p (enum machine_mode);
1105 static int ix86_address_cost (rtx);
1106 static bool ix86_cannot_force_const_mem (rtx);
1107 static rtx ix86_delegitimize_address (rtx);
1109 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1111 struct builtin_description;
1112 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1114 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1116 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1117 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1118 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1119 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1120 static rtx safe_vector_operand (rtx, enum machine_mode);
1121 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1122 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1123 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1124 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1125 static int ix86_fp_comparison_cost (enum rtx_code code);
1126 static unsigned int ix86_select_alt_pic_regnum (void);
1127 static int ix86_save_reg (unsigned int, int);
1128 static void ix86_compute_frame_layout (struct ix86_frame *);
1129 static int ix86_comp_type_attributes (tree, tree);
1130 static int ix86_function_regparm (tree, tree);
1131 const struct attribute_spec ix86_attribute_table[];
1132 static bool ix86_function_ok_for_sibcall (tree, tree);
1133 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1134 static int ix86_value_regno (enum machine_mode, tree, tree);
1135 static bool contains_128bit_aligned_vector_p (tree);
1136 static rtx ix86_struct_value_rtx (tree, int);
1137 static bool ix86_ms_bitfield_layout_p (tree);
1138 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1139 static int extended_reg_mentioned_1 (rtx *, void *);
1140 static bool ix86_rtx_costs (rtx, int, int, int *);
1141 static int min_insn_size (rtx);
1142 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1143 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1144 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1146 static void ix86_init_builtins (void);
1147 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1148 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1149 static const char *ix86_mangle_fundamental_type (tree);
1150 static tree ix86_stack_protect_fail (void);
1151 static rtx ix86_internal_arg_pointer (void);
1152 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1154 /* This function is only used on Solaris. */
1155 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1158 /* Register class used for passing given 64bit part of the argument.
1159 These represent classes as documented by the PS ABI, with the exception
1160 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1161 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1163 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1164 whenever possible (upper half does contain padding).
1166 enum x86_64_reg_class
1169 X86_64_INTEGER_CLASS,
1170 X86_64_INTEGERSI_CLASS,
1177 X86_64_COMPLEX_X87_CLASS,
1180 static const char * const x86_64_reg_class_name[] = {
1181 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1182 "sseup", "x87", "x87up", "cplx87", "no"
1185 #define MAX_CLASSES 4
1187 /* Table of constants used by fldpi, fldln2, etc.... */
1188 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1189 static bool ext_80387_constants_init = 0;
1190 static void init_ext_80387_constants (void);
1191 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1192 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1193 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1194 static section *x86_64_elf_select_section (tree decl, int reloc,
1195 unsigned HOST_WIDE_INT align)
1198 /* Initialize the GCC target structure. */
1199 #undef TARGET_ATTRIBUTE_TABLE
1200 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1201 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1202 # undef TARGET_MERGE_DECL_ATTRIBUTES
1203 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1206 #undef TARGET_COMP_TYPE_ATTRIBUTES
1207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1209 #undef TARGET_INIT_BUILTINS
1210 #define TARGET_INIT_BUILTINS ix86_init_builtins
1211 #undef TARGET_EXPAND_BUILTIN
1212 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1213 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1214 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1216 #undef TARGET_ASM_FUNCTION_EPILOGUE
1217 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1219 #undef TARGET_ENCODE_SECTION_INFO
1220 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1221 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1223 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1226 #undef TARGET_ASM_OPEN_PAREN
1227 #define TARGET_ASM_OPEN_PAREN ""
1228 #undef TARGET_ASM_CLOSE_PAREN
1229 #define TARGET_ASM_CLOSE_PAREN ""
1231 #undef TARGET_ASM_ALIGNED_HI_OP
1232 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1233 #undef TARGET_ASM_ALIGNED_SI_OP
1234 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1236 #undef TARGET_ASM_ALIGNED_DI_OP
1237 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1240 #undef TARGET_ASM_UNALIGNED_HI_OP
1241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1242 #undef TARGET_ASM_UNALIGNED_SI_OP
1243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1244 #undef TARGET_ASM_UNALIGNED_DI_OP
1245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1247 #undef TARGET_SCHED_ADJUST_COST
1248 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1249 #undef TARGET_SCHED_ISSUE_RATE
1250 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1251 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1252 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1253 ia32_multipass_dfa_lookahead
1255 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1256 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1259 #undef TARGET_HAVE_TLS
1260 #define TARGET_HAVE_TLS true
1262 #undef TARGET_CANNOT_FORCE_CONST_MEM
1263 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1265 #undef TARGET_DELEGITIMIZE_ADDRESS
1266 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1268 #undef TARGET_MS_BITFIELD_LAYOUT_P
1269 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1272 #undef TARGET_BINDS_LOCAL_P
1273 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1276 #undef TARGET_ASM_OUTPUT_MI_THUNK
1277 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1278 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1279 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1281 #undef TARGET_ASM_FILE_START
1282 #define TARGET_ASM_FILE_START x86_file_start
1284 #undef TARGET_DEFAULT_TARGET_FLAGS
1285 #define TARGET_DEFAULT_TARGET_FLAGS \
1287 | TARGET_64BIT_DEFAULT \
1288 | TARGET_SUBTARGET_DEFAULT \
1289 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1291 #undef TARGET_HANDLE_OPTION
1292 #define TARGET_HANDLE_OPTION ix86_handle_option
1294 #undef TARGET_RTX_COSTS
1295 #define TARGET_RTX_COSTS ix86_rtx_costs
1296 #undef TARGET_ADDRESS_COST
1297 #define TARGET_ADDRESS_COST ix86_address_cost
1299 #undef TARGET_FIXED_CONDITION_CODE_REGS
1300 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1301 #undef TARGET_CC_MODES_COMPATIBLE
1302 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1304 #undef TARGET_MACHINE_DEPENDENT_REORG
1305 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1307 #undef TARGET_BUILD_BUILTIN_VA_LIST
1308 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1310 #undef TARGET_MD_ASM_CLOBBERS
1311 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1313 #undef TARGET_PROMOTE_PROTOTYPES
1314 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1315 #undef TARGET_STRUCT_VALUE_RTX
1316 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1317 #undef TARGET_SETUP_INCOMING_VARARGS
1318 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1319 #undef TARGET_MUST_PASS_IN_STACK
1320 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1321 #undef TARGET_PASS_BY_REFERENCE
1322 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1323 #undef TARGET_INTERNAL_ARG_POINTER
1324 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1325 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1326 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1328 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1329 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1331 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1332 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1334 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1335 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1338 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1339 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1342 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1343 #undef TARGET_INSERT_ATTRIBUTES
1344 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1347 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1348 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1350 #undef TARGET_STACK_PROTECT_FAIL
1351 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1353 #undef TARGET_FUNCTION_VALUE
1354 #define TARGET_FUNCTION_VALUE ix86_function_value
1356 struct gcc_target targetm = TARGET_INITIALIZER;
1359 /* The svr4 ABI for the i386 says that records and unions are returned
1361 #ifndef DEFAULT_PCC_STRUCT_RETURN
1362 #define DEFAULT_PCC_STRUCT_RETURN 1
1365 /* Implement TARGET_HANDLE_OPTION. */
1368 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1375 target_flags &= ~MASK_3DNOW_A;
1376 target_flags_explicit |= MASK_3DNOW_A;
1383 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1384 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1391 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1392 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1399 target_flags &= ~MASK_SSE3;
1400 target_flags_explicit |= MASK_SSE3;
1409 /* Sometimes certain combinations of command options do not make
1410 sense on a particular target machine. You can define a macro
1411 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1412 defined, is executed once just after all the command options have
1415 Don't use this macro to turn on various extra optimizations for
1416 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1419 override_options (void)
1422 int ix86_tune_defaulted = 0;
1424 /* Comes from final.c -- no real reason to change it. */
1425 #define MAX_CODE_ALIGN 16
1429 const struct processor_costs *cost; /* Processor costs */
1430 const int target_enable; /* Target flags to enable. */
1431 const int target_disable; /* Target flags to disable. */
1432 const int align_loop; /* Default alignments. */
1433 const int align_loop_max_skip;
1434 const int align_jump;
1435 const int align_jump_max_skip;
1436 const int align_func;
1438 const processor_target_table[PROCESSOR_max] =
1440 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1441 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1442 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1443 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1444 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1445 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1446 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1447 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1448 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1449 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1450 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1453 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1456 const char *const name; /* processor name or nickname. */
1457 const enum processor_type processor;
1458 const enum pta_flags
1464 PTA_PREFETCH_SSE = 16,
1470 const processor_alias_table[] =
1472 {"i386", PROCESSOR_I386, 0},
1473 {"i486", PROCESSOR_I486, 0},
1474 {"i586", PROCESSOR_PENTIUM, 0},
1475 {"pentium", PROCESSOR_PENTIUM, 0},
1476 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1477 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1478 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1479 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1480 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1481 {"i686", PROCESSOR_PENTIUMPRO, 0},
1482 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1483 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1484 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1485 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1486 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1487 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1488 | PTA_MMX | PTA_PREFETCH_SSE},
1489 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1490 | PTA_MMX | PTA_PREFETCH_SSE},
1491 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1492 | PTA_MMX | PTA_PREFETCH_SSE},
1493 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1494 | PTA_MMX | PTA_PREFETCH_SSE},
1495 {"k6", PROCESSOR_K6, PTA_MMX},
1496 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1497 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1498 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1500 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1501 | PTA_3DNOW | PTA_3DNOW_A},
1502 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1503 | PTA_3DNOW_A | PTA_SSE},
1504 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1505 | PTA_3DNOW_A | PTA_SSE},
1506 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1507 | PTA_3DNOW_A | PTA_SSE},
1508 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1509 | PTA_SSE | PTA_SSE2 },
1510 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1511 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1512 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1513 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1514 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1515 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1516 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1517 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1518 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1519 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1522 int const pta_size = ARRAY_SIZE (processor_alias_table);
1524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1525 SUBTARGET_OVERRIDE_OPTIONS;
1528 /* Set the default values for switches whose default depends on TARGET_64BIT
1529 in case they weren't overwritten by command line options. */
1532 if (flag_omit_frame_pointer == 2)
1533 flag_omit_frame_pointer = 1;
1534 if (flag_asynchronous_unwind_tables == 2)
1535 flag_asynchronous_unwind_tables = 1;
1536 if (flag_pcc_struct_return == 2)
1537 flag_pcc_struct_return = 0;
1541 if (flag_omit_frame_pointer == 2)
1542 flag_omit_frame_pointer = 0;
1543 if (flag_asynchronous_unwind_tables == 2)
1544 flag_asynchronous_unwind_tables = 0;
1545 if (flag_pcc_struct_return == 2)
1546 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1549 /* Need to check -mtune=generic first. */
1550 if (ix86_tune_string)
1552 if (!strcmp (ix86_tune_string, "generic")
1553 || !strcmp (ix86_tune_string, "i686"))
1556 ix86_tune_string = "generic64";
1558 ix86_tune_string = "generic32";
1560 else if (!strncmp (ix86_tune_string, "generic", 7))
1561 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1565 if (ix86_arch_string)
1566 ix86_tune_string = ix86_arch_string;
1567 if (!ix86_tune_string)
1569 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1570 ix86_tune_defaulted = 1;
1573 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1574 need to use a sensible tune option. */
1575 if (!strcmp (ix86_tune_string, "generic")
1576 || !strcmp (ix86_tune_string, "x86-64")
1577 || !strcmp (ix86_tune_string, "i686"))
1580 ix86_tune_string = "generic64";
1582 ix86_tune_string = "generic32";
1585 if (!strcmp (ix86_tune_string, "x86-64"))
1586 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1587 "-mtune=generic instead as appropriate.");
1589 if (!ix86_arch_string)
1590 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1591 if (!strcmp (ix86_arch_string, "generic"))
1592 error ("generic CPU can be used only for -mtune= switch");
1593 if (!strncmp (ix86_arch_string, "generic", 7))
1594 error ("bad value (%s) for -march= switch", ix86_arch_string);
1596 if (ix86_cmodel_string != 0)
1598 if (!strcmp (ix86_cmodel_string, "small"))
1599 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1600 else if (!strcmp (ix86_cmodel_string, "medium"))
1601 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1603 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1604 else if (!strcmp (ix86_cmodel_string, "32"))
1605 ix86_cmodel = CM_32;
1606 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1607 ix86_cmodel = CM_KERNEL;
1608 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1609 ix86_cmodel = CM_LARGE;
1611 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1615 ix86_cmodel = CM_32;
1617 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1619 if (ix86_asm_string != 0)
1622 && !strcmp (ix86_asm_string, "intel"))
1623 ix86_asm_dialect = ASM_INTEL;
1624 else if (!strcmp (ix86_asm_string, "att"))
1625 ix86_asm_dialect = ASM_ATT;
1627 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1629 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1630 error ("code model %qs not supported in the %s bit mode",
1631 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1632 if (ix86_cmodel == CM_LARGE)
1633 sorry ("code model %<large%> not supported yet");
1634 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1635 sorry ("%i-bit mode not compiled in",
1636 (target_flags & MASK_64BIT) ? 64 : 32);
1638 for (i = 0; i < pta_size; i++)
1639 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1641 ix86_arch = processor_alias_table[i].processor;
1642 /* Default cpu tuning to the architecture. */
1643 ix86_tune = ix86_arch;
1644 if (processor_alias_table[i].flags & PTA_MMX
1645 && !(target_flags_explicit & MASK_MMX))
1646 target_flags |= MASK_MMX;
1647 if (processor_alias_table[i].flags & PTA_3DNOW
1648 && !(target_flags_explicit & MASK_3DNOW))
1649 target_flags |= MASK_3DNOW;
1650 if (processor_alias_table[i].flags & PTA_3DNOW_A
1651 && !(target_flags_explicit & MASK_3DNOW_A))
1652 target_flags |= MASK_3DNOW_A;
1653 if (processor_alias_table[i].flags & PTA_SSE
1654 && !(target_flags_explicit & MASK_SSE))
1655 target_flags |= MASK_SSE;
1656 if (processor_alias_table[i].flags & PTA_SSE2
1657 && !(target_flags_explicit & MASK_SSE2))
1658 target_flags |= MASK_SSE2;
1659 if (processor_alias_table[i].flags & PTA_SSE3
1660 && !(target_flags_explicit & MASK_SSE3))
1661 target_flags |= MASK_SSE3;
1662 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1663 x86_prefetch_sse = true;
1664 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1665 error ("CPU you selected does not support x86-64 "
1671 error ("bad value (%s) for -march= switch", ix86_arch_string);
1673 for (i = 0; i < pta_size; i++)
1674 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1676 ix86_tune = processor_alias_table[i].processor;
1677 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1679 if (ix86_tune_defaulted)
1681 ix86_tune_string = "x86-64";
1682 for (i = 0; i < pta_size; i++)
1683 if (! strcmp (ix86_tune_string,
1684 processor_alias_table[i].name))
1686 ix86_tune = processor_alias_table[i].processor;
1689 error ("CPU you selected does not support x86-64 "
1692 /* Intel CPUs have always interpreted SSE prefetch instructions as
1693 NOPs; so, we can enable SSE prefetch instructions even when
1694 -mtune (rather than -march) points us to a processor that has them.
1695 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1696 higher processors. */
1697 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1698 x86_prefetch_sse = true;
1702 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1705 ix86_cost = &size_cost;
1707 ix86_cost = processor_target_table[ix86_tune].cost;
1708 target_flags |= processor_target_table[ix86_tune].target_enable;
1709 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1711 /* Arrange to set up i386_stack_locals for all functions. */
1712 init_machine_status = ix86_init_machine_status;
1714 /* Validate -mregparm= value. */
1715 if (ix86_regparm_string)
1717 i = atoi (ix86_regparm_string);
1718 if (i < 0 || i > REGPARM_MAX)
1719 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1725 ix86_regparm = REGPARM_MAX;
1727 /* If the user has provided any of the -malign-* options,
1728 warn and use that value only if -falign-* is not set.
1729 Remove this code in GCC 3.2 or later. */
1730 if (ix86_align_loops_string)
1732 warning (0, "-malign-loops is obsolete, use -falign-loops");
1733 if (align_loops == 0)
1735 i = atoi (ix86_align_loops_string);
1736 if (i < 0 || i > MAX_CODE_ALIGN)
1737 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1739 align_loops = 1 << i;
1743 if (ix86_align_jumps_string)
1745 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1746 if (align_jumps == 0)
1748 i = atoi (ix86_align_jumps_string);
1749 if (i < 0 || i > MAX_CODE_ALIGN)
1750 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1752 align_jumps = 1 << i;
1756 if (ix86_align_funcs_string)
1758 warning (0, "-malign-functions is obsolete, use -falign-functions");
1759 if (align_functions == 0)
1761 i = atoi (ix86_align_funcs_string);
1762 if (i < 0 || i > MAX_CODE_ALIGN)
1763 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1765 align_functions = 1 << i;
1769 /* Default align_* from the processor table. */
1770 if (align_loops == 0)
1772 align_loops = processor_target_table[ix86_tune].align_loop;
1773 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1775 if (align_jumps == 0)
1777 align_jumps = processor_target_table[ix86_tune].align_jump;
1778 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1780 if (align_functions == 0)
1782 align_functions = processor_target_table[ix86_tune].align_func;
1785 /* Validate -mpreferred-stack-boundary= value, or provide default.
1786 The default of 128 bits is for Pentium III's SSE __m128, but we
1787 don't want additional code to keep the stack aligned when
1788 optimizing for code size. */
1789 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1791 if (ix86_preferred_stack_boundary_string)
1793 i = atoi (ix86_preferred_stack_boundary_string);
1794 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1795 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1796 TARGET_64BIT ? 4 : 2);
1798 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1801 /* Validate -mbranch-cost= value, or provide default. */
1802 ix86_branch_cost = ix86_cost->branch_cost;
1803 if (ix86_branch_cost_string)
1805 i = atoi (ix86_branch_cost_string);
1807 error ("-mbranch-cost=%d is not between 0 and 5", i);
1809 ix86_branch_cost = i;
1811 if (ix86_section_threshold_string)
1813 i = atoi (ix86_section_threshold_string);
1815 error ("-mlarge-data-threshold=%d is negative", i);
1817 ix86_section_threshold = i;
1820 if (ix86_tls_dialect_string)
1822 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1823 ix86_tls_dialect = TLS_DIALECT_GNU;
1824 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1825 ix86_tls_dialect = TLS_DIALECT_GNU2;
1826 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1827 ix86_tls_dialect = TLS_DIALECT_SUN;
1829 error ("bad value (%s) for -mtls-dialect= switch",
1830 ix86_tls_dialect_string);
1833 /* Keep nonleaf frame pointers. */
1834 if (flag_omit_frame_pointer)
1835 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1836 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1837 flag_omit_frame_pointer = 1;
1839 /* If we're doing fast math, we don't care about comparison order
1840 wrt NaNs. This lets us use a shorter comparison sequence. */
1841 if (flag_unsafe_math_optimizations)
1842 target_flags &= ~MASK_IEEE_FP;
1844 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1845 since the insns won't need emulation. */
1846 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1847 target_flags &= ~MASK_NO_FANCY_MATH_387;
1849 /* Likewise, if the target doesn't have a 387, or we've specified
1850 software floating point, don't use 387 inline intrinsics. */
1852 target_flags |= MASK_NO_FANCY_MATH_387;
1854 /* Turn on SSE2 builtins for -msse3. */
1856 target_flags |= MASK_SSE2;
1858 /* Turn on SSE builtins for -msse2. */
1860 target_flags |= MASK_SSE;
1862 /* Turn on MMX builtins for -msse. */
1865 target_flags |= MASK_MMX & ~target_flags_explicit;
1866 x86_prefetch_sse = true;
1869 /* Turn on MMX builtins for 3Dnow. */
1871 target_flags |= MASK_MMX;
1875 if (TARGET_ALIGN_DOUBLE)
1876 error ("-malign-double makes no sense in the 64bit mode");
1878 error ("-mrtd calling convention not supported in the 64bit mode");
1880 /* Enable by default the SSE and MMX builtins. Do allow the user to
1881 explicitly disable any of these. In particular, disabling SSE and
1882 MMX for kernel code is extremely useful. */
1884 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1885 & ~target_flags_explicit);
1889 /* i386 ABI does not specify red zone. It still makes sense to use it
1890 when programmer takes care to stack from being destroyed. */
1891 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1892 target_flags |= MASK_NO_RED_ZONE;
1895 /* Accept -msseregparm only if at least SSE support is enabled. */
1896 if (TARGET_SSEREGPARM
1898 error ("-msseregparm used without SSE enabled");
1900 /* Accept -msselibm only if at least SSE support is enabled. */
1903 error ("-msselibm used without SSE2 enabled");
1905 /* Ignore -msselibm on 64bit targets. */
1908 error ("-msselibm used on a 64bit target");
1910 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1912 if (ix86_fpmath_string != 0)
1914 if (! strcmp (ix86_fpmath_string, "387"))
1915 ix86_fpmath = FPMATH_387;
1916 else if (! strcmp (ix86_fpmath_string, "sse"))
1920 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1921 ix86_fpmath = FPMATH_387;
1924 ix86_fpmath = FPMATH_SSE;
1926 else if (! strcmp (ix86_fpmath_string, "387,sse")
1927 || ! strcmp (ix86_fpmath_string, "sse,387"))
1931 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1932 ix86_fpmath = FPMATH_387;
1934 else if (!TARGET_80387)
1936 warning (0, "387 instruction set disabled, using SSE arithmetics");
1937 ix86_fpmath = FPMATH_SSE;
1940 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1943 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1946 /* If the i387 is disabled, then do not return values in it. */
1948 target_flags &= ~MASK_FLOAT_RETURNS;
1950 if ((x86_accumulate_outgoing_args & TUNEMASK)
1951 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1953 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1955 /* ??? Unwind info is not correct around the CFG unless either a frame
1956 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1957 unwind info generation to be aware of the CFG and propagating states
1959 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1960 || flag_exceptions || flag_non_call_exceptions)
1961 && flag_omit_frame_pointer
1962 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1964 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1965 warning (0, "unwind tables currently require either a frame pointer "
1966 "or -maccumulate-outgoing-args for correctness");
1967 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1973 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1974 p = strchr (internal_label_prefix, 'X');
1975 internal_label_prefix_len = p - internal_label_prefix;
1979 /* When scheduling description is not available, disable scheduler pass
1980 so it won't slow down the compilation and make x87 code slower. */
1981 if (!TARGET_SCHEDULE)
1982 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1985 /* switch to the appropriate section for output of DECL.
1986 DECL is either a `VAR_DECL' node or a constant of some sort.
1987 RELOC indicates whether forming the initial value of DECL requires
1988 link-time relocations. */
1991 x86_64_elf_select_section (tree decl, int reloc,
1992 unsigned HOST_WIDE_INT align)
1994 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1995 && ix86_in_large_data_p (decl))
1997 const char *sname = NULL;
1998 unsigned int flags = SECTION_WRITE;
1999 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2004 case SECCAT_DATA_REL:
2005 sname = ".ldata.rel";
2007 case SECCAT_DATA_REL_LOCAL:
2008 sname = ".ldata.rel.local";
2010 case SECCAT_DATA_REL_RO:
2011 sname = ".ldata.rel.ro";
2013 case SECCAT_DATA_REL_RO_LOCAL:
2014 sname = ".ldata.rel.ro.local";
2018 flags |= SECTION_BSS;
2021 case SECCAT_RODATA_MERGE_STR:
2022 case SECCAT_RODATA_MERGE_STR_INIT:
2023 case SECCAT_RODATA_MERGE_CONST:
2027 case SECCAT_SRODATA:
2034 /* We don't split these for medium model. Place them into
2035 default sections and hope for best. */
2040 /* We might get called with string constants, but get_named_section
2041 doesn't like them as they are not DECLs. Also, we need to set
2042 flags in that case. */
2044 return get_section (sname, flags, NULL);
2045 return get_named_section (decl, sname, reloc);
2048 return default_elf_select_section (decl, reloc, align);
2051 /* Build up a unique section name, expressed as a
2052 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2053 RELOC indicates whether the initial value of EXP requires
2054 link-time relocations. */
2057 x86_64_elf_unique_section (tree decl, int reloc)
2059 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2060 && ix86_in_large_data_p (decl))
2062 const char *prefix = NULL;
2063 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2064 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2066 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2069 case SECCAT_DATA_REL:
2070 case SECCAT_DATA_REL_LOCAL:
2071 case SECCAT_DATA_REL_RO:
2072 case SECCAT_DATA_REL_RO_LOCAL:
2073 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2076 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2079 case SECCAT_RODATA_MERGE_STR:
2080 case SECCAT_RODATA_MERGE_STR_INIT:
2081 case SECCAT_RODATA_MERGE_CONST:
2082 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2084 case SECCAT_SRODATA:
2091 /* We don't split these for medium model. Place them into
2092 default sections and hope for best. */
2100 plen = strlen (prefix);
2102 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2103 name = targetm.strip_name_encoding (name);
2104 nlen = strlen (name);
2106 string = alloca (nlen + plen + 1);
2107 memcpy (string, prefix, plen);
2108 memcpy (string + plen, name, nlen + 1);
2110 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2114 default_unique_section (decl, reloc);
2117 #ifdef COMMON_ASM_OP
2118 /* This says how to output assembler code to declare an
2119 uninitialized external linkage data object.
2121 For medium model x86-64 we need to use .largecomm opcode for
2124 x86_elf_aligned_common (FILE *file,
2125 const char *name, unsigned HOST_WIDE_INT size,
2128 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2129 && size > (unsigned int)ix86_section_threshold)
2130 fprintf (file, ".largecomm\t");
2132 fprintf (file, "%s", COMMON_ASM_OP);
2133 assemble_name (file, name);
2134 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2135 size, align / BITS_PER_UNIT);
2138 /* Utility function for targets to use in implementing
2139 ASM_OUTPUT_ALIGNED_BSS. */
2142 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2143 const char *name, unsigned HOST_WIDE_INT size,
2146 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2147 && size > (unsigned int)ix86_section_threshold)
2148 switch_to_section (get_named_section (decl, ".lbss", 0));
2150 switch_to_section (bss_section);
2151 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2152 #ifdef ASM_DECLARE_OBJECT_NAME
2153 last_assemble_variable_decl = decl;
2154 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2156 /* Standard thing is just output label for the object. */
2157 ASM_OUTPUT_LABEL (file, name);
2158 #endif /* ASM_DECLARE_OBJECT_NAME */
2159 ASM_OUTPUT_SKIP (file, size ? size : 1);
2164 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2166 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2167 make the problem with not enough registers even worse. */
2168 #ifdef INSN_SCHEDULING
2170 flag_schedule_insns = 0;
2174 /* The Darwin libraries never set errno, so we might as well
2175 avoid calling them when that's the only reason we would. */
2176 flag_errno_math = 0;
2178 /* The default values of these switches depend on the TARGET_64BIT
2179 that is not known at this moment. Mark these values with 2 and
2180 let user the to override these. In case there is no command line option
2181 specifying them, we will set the defaults in override_options. */
2183 flag_omit_frame_pointer = 2;
2184 flag_pcc_struct_return = 2;
2185 flag_asynchronous_unwind_tables = 2;
2186 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2187 SUBTARGET_OPTIMIZATION_OPTIONS;
2191 /* Table of valid machine attributes. */
2192 const struct attribute_spec ix86_attribute_table[] =
2194 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2195 /* Stdcall attribute says callee is responsible for popping arguments
2196 if they are not variable. */
2197 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2198 /* Fastcall attribute says callee is responsible for popping arguments
2199 if they are not variable. */
2200 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2201 /* Cdecl attribute says the callee is a normal C declaration */
2202 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2203 /* Regparm attribute specifies how many integer arguments are to be
2204 passed in registers. */
2205 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2206 /* Sseregparm attribute says we are using x86_64 calling conventions
2207 for FP arguments. */
2208 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2209 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2210 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2211 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2212 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2214 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2215 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2216 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2217 SUBTARGET_ATTRIBUTE_TABLE,
2219 { NULL, 0, 0, false, false, false, NULL }
2222 /* Decide whether we can make a sibling call to a function. DECL is the
2223 declaration of the function being targeted by the call and EXP is the
2224 CALL_EXPR representing the call. */
2227 ix86_function_ok_for_sibcall (tree decl, tree exp)
2232 /* If we are generating position-independent code, we cannot sibcall
2233 optimize any indirect call, or a direct call to a global function,
2234 as the PLT requires %ebx be live. */
2235 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2242 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2243 if (POINTER_TYPE_P (func))
2244 func = TREE_TYPE (func);
2247 /* Check that the return value locations are the same. Like
2248 if we are returning floats on the 80387 register stack, we cannot
2249 make a sibcall from a function that doesn't return a float to a
2250 function that does or, conversely, from a function that does return
2251 a float to a function that doesn't; the necessary stack adjustment
2252 would not be executed. This is also the place we notice
2253 differences in the return value ABI. Note that it is ok for one
2254 of the functions to have void return type as long as the return
2255 value of the other is passed in a register. */
2256 a = ix86_function_value (TREE_TYPE (exp), func, false);
2257 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2259 if (STACK_REG_P (a) || STACK_REG_P (b))
2261 if (!rtx_equal_p (a, b))
2264 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2266 else if (!rtx_equal_p (a, b))
2269 /* If this call is indirect, we'll need to be able to use a call-clobbered
2270 register for the address of the target function. Make sure that all
2271 such registers are not used for passing parameters. */
2272 if (!decl && !TARGET_64BIT)
2276 /* We're looking at the CALL_EXPR, we need the type of the function. */
2277 type = TREE_OPERAND (exp, 0); /* pointer expression */
2278 type = TREE_TYPE (type); /* pointer type */
2279 type = TREE_TYPE (type); /* function type */
2281 if (ix86_function_regparm (type, NULL) >= 3)
2283 /* ??? Need to count the actual number of registers to be used,
2284 not the possible number of registers. Fix later. */
2289 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2290 /* Dllimport'd functions are also called indirectly. */
2291 if (decl && DECL_DLLIMPORT_P (decl)
2292 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2296 /* If we forced aligned the stack, then sibcalling would unalign the
2297 stack, which may break the called function. */
2298 if (cfun->machine->force_align_arg_pointer)
2301 /* Otherwise okay. That also includes certain types of indirect calls. */
2305 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2306 calling convention attributes;
2307 arguments as in struct attribute_spec.handler. */
2310 ix86_handle_cconv_attribute (tree *node, tree name,
2312 int flags ATTRIBUTE_UNUSED,
2315 if (TREE_CODE (*node) != FUNCTION_TYPE
2316 && TREE_CODE (*node) != METHOD_TYPE
2317 && TREE_CODE (*node) != FIELD_DECL
2318 && TREE_CODE (*node) != TYPE_DECL)
2320 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2321 IDENTIFIER_POINTER (name));
2322 *no_add_attrs = true;
2326 /* Can combine regparm with all attributes but fastcall. */
2327 if (is_attribute_p ("regparm", name))
2331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2333 error ("fastcall and regparm attributes are not compatible");
2336 cst = TREE_VALUE (args);
2337 if (TREE_CODE (cst) != INTEGER_CST)
2339 warning (OPT_Wattributes,
2340 "%qs attribute requires an integer constant argument",
2341 IDENTIFIER_POINTER (name));
2342 *no_add_attrs = true;
2344 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2346 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2347 IDENTIFIER_POINTER (name), REGPARM_MAX);
2348 *no_add_attrs = true;
2356 warning (OPT_Wattributes, "%qs attribute ignored",
2357 IDENTIFIER_POINTER (name));
2358 *no_add_attrs = true;
2362 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2363 if (is_attribute_p ("fastcall", name))
2365 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2367 error ("fastcall and cdecl attributes are not compatible");
2369 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2371 error ("fastcall and stdcall attributes are not compatible");
2373 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2375 error ("fastcall and regparm attributes are not compatible");
2379 /* Can combine stdcall with fastcall (redundant), regparm and
2381 else if (is_attribute_p ("stdcall", name))
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2385 error ("stdcall and cdecl attributes are not compatible");
2387 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2389 error ("stdcall and fastcall attributes are not compatible");
2393 /* Can combine cdecl with regparm and sseregparm. */
2394 else if (is_attribute_p ("cdecl", name))
2396 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2398 error ("stdcall and cdecl attributes are not compatible");
2400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2402 error ("fastcall and cdecl attributes are not compatible");
2406 /* Can combine sseregparm with all attributes. */
2411 /* Return 0 if the attributes for two types are incompatible, 1 if they
2412 are compatible, and 2 if they are nearly compatible (which causes a
2413 warning to be generated). */
2416 ix86_comp_type_attributes (tree type1, tree type2)
2418 /* Check for mismatch of non-default calling convention. */
2419 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2421 if (TREE_CODE (type1) != FUNCTION_TYPE)
2424 /* Check for mismatched fastcall/regparm types. */
2425 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2426 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2427 || (ix86_function_regparm (type1, NULL)
2428 != ix86_function_regparm (type2, NULL)))
2431 /* Check for mismatched sseregparm types. */
2432 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2433 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2436 /* Check for mismatched return types (cdecl vs stdcall). */
2437 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2438 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2444 /* Return the regparm value for a function with the indicated TYPE and DECL.
2445 DECL may be NULL when calling function indirectly
2446 or considering a libcall. */
2449 ix86_function_regparm (tree type, tree decl)
2452 int regparm = ix86_regparm;
2453 bool user_convention = false;
2457 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2460 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2461 user_convention = true;
2464 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2467 user_convention = true;
2470 /* Use register calling convention for local functions when possible. */
2471 if (!TARGET_64BIT && !user_convention && decl
2472 && flag_unit_at_a_time && !profile_flag)
2474 struct cgraph_local_info *i = cgraph_local_info (decl);
2477 int local_regparm, globals = 0, regno;
2479 /* Make sure no regparm register is taken by a global register
2481 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2482 if (global_regs[local_regparm])
2484 /* We can't use regparm(3) for nested functions as these use
2485 static chain pointer in third argument. */
2486 if (local_regparm == 3
2487 && decl_function_context (decl)
2488 && !DECL_NO_STATIC_CHAIN (decl))
2490 /* Each global register variable increases register preassure,
2491 so the more global reg vars there are, the smaller regparm
2492 optimization use, unless requested by the user explicitly. */
2493 for (regno = 0; regno < 6; regno++)
2494 if (global_regs[regno])
2497 = globals < local_regparm ? local_regparm - globals : 0;
2499 if (local_regparm > regparm)
2500 regparm = local_regparm;
2507 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2508 in SSE registers for a function with the indicated TYPE and DECL.
2509 DECL may be NULL when calling function indirectly
2510 or considering a libcall. Otherwise return 0. */
2513 ix86_function_sseregparm (tree type, tree decl)
2515 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2516 by the sseregparm attribute. */
2517 if (TARGET_SSEREGPARM
2519 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2524 error ("Calling %qD with attribute sseregparm without "
2525 "SSE/SSE2 enabled", decl);
2527 error ("Calling %qT with attribute sseregparm without "
2528 "SSE/SSE2 enabled", type);
2535 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2536 in SSE registers even for 32-bit mode and not just 3, but up to
2537 8 SSE arguments in registers. */
2538 if (!TARGET_64BIT && decl
2539 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2541 struct cgraph_local_info *i = cgraph_local_info (decl);
2543 return TARGET_SSE2 ? 2 : 1;
2549 /* Return true if EAX is live at the start of the function. Used by
2550 ix86_expand_prologue to determine if we need special help before
2551 calling allocate_stack_worker. */
2554 ix86_eax_live_at_start_p (void)
2556 /* Cheat. Don't bother working forward from ix86_function_regparm
2557 to the function type to whether an actual argument is located in
2558 eax. Instead just look at cfg info, which is still close enough
2559 to correct at this point. This gives false positives for broken
2560 functions that might use uninitialized data that happens to be
2561 allocated in eax, but who cares? */
2562 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2565 /* Value is the number of bytes of arguments automatically
2566 popped when returning from a subroutine call.
2567 FUNDECL is the declaration node of the function (as a tree),
2568 FUNTYPE is the data type of the function (as a tree),
2569 or for a library call it is an identifier node for the subroutine name.
2570 SIZE is the number of bytes of arguments passed on the stack.
2572 On the 80386, the RTD insn may be used to pop them if the number
2573 of args is fixed, but if the number is variable then the caller
2574 must pop them all. RTD can't be used for library calls now
2575 because the library is compiled with the Unix compiler.
2576 Use of RTD is a selectable option, since it is incompatible with
2577 standard Unix calling sequences. If the option is not selected,
2578 the caller must always pop the args.
2580 The attribute stdcall is equivalent to RTD on a per module basis. */
2583 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2585 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2587 /* Cdecl functions override -mrtd, and never pop the stack. */
2588 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2590 /* Stdcall and fastcall functions will pop the stack if not
2592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2593 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2597 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2598 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2599 == void_type_node)))
2603 /* Lose any fake structure return argument if it is passed on the stack. */
2604 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2606 && !KEEP_AGGREGATE_RETURN_POINTER)
2608 int nregs = ix86_function_regparm (funtype, fundecl);
2611 return GET_MODE_SIZE (Pmode);
2617 /* Argument support functions. */
2619 /* Return true when register may be used to pass function parameters. */
2621 ix86_function_arg_regno_p (int regno)
2625 return (regno < REGPARM_MAX
2626 || (TARGET_MMX && MMX_REGNO_P (regno)
2627 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2628 || (TARGET_SSE && SSE_REGNO_P (regno)
2629 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2631 if (TARGET_SSE && SSE_REGNO_P (regno)
2632 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2634 /* RAX is used as hidden argument to va_arg functions. */
2637 for (i = 0; i < REGPARM_MAX; i++)
2638 if (regno == x86_64_int_parameter_registers[i])
2643 /* Return if we do not know how to pass TYPE solely in registers. */
2646 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2648 if (must_pass_in_stack_var_size_or_pad (mode, type))
2651 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2652 The layout_type routine is crafty and tries to trick us into passing
2653 currently unsupported vector types on the stack by using TImode. */
2654 return (!TARGET_64BIT && mode == TImode
2655 && type && TREE_CODE (type) != VECTOR_TYPE);
2658 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2659 for a call to a function whose data type is FNTYPE.
2660 For a library call, FNTYPE is 0. */
2663 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2664 tree fntype, /* tree ptr for function decl */
2665 rtx libname, /* SYMBOL_REF of library name or 0 */
2668 static CUMULATIVE_ARGS zero_cum;
2669 tree param, next_param;
2671 if (TARGET_DEBUG_ARG)
2673 fprintf (stderr, "\ninit_cumulative_args (");
2675 fprintf (stderr, "fntype code = %s, ret code = %s",
2676 tree_code_name[(int) TREE_CODE (fntype)],
2677 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2679 fprintf (stderr, "no fntype");
2682 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2687 /* Set up the number of registers to use for passing arguments. */
2688 cum->nregs = ix86_regparm;
2690 cum->sse_nregs = SSE_REGPARM_MAX;
2692 cum->mmx_nregs = MMX_REGPARM_MAX;
2693 cum->warn_sse = true;
2694 cum->warn_mmx = true;
2695 cum->maybe_vaarg = false;
2697 /* Use ecx and edx registers if function has fastcall attribute,
2698 else look for regparm information. */
2699 if (fntype && !TARGET_64BIT)
2701 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2707 cum->nregs = ix86_function_regparm (fntype, fndecl);
2710 /* Set up the number of SSE registers used for passing SFmode
2711 and DFmode arguments. Warn for mismatching ABI. */
2712 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2714 /* Determine if this function has variable arguments. This is
2715 indicated by the last argument being 'void_type_mode' if there
2716 are no variable arguments. If there are variable arguments, then
2717 we won't pass anything in registers in 32-bit mode. */
2719 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2721 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2722 param != 0; param = next_param)
2724 next_param = TREE_CHAIN (param);
2725 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2735 cum->float_in_sse = 0;
2737 cum->maybe_vaarg = true;
2741 if ((!fntype && !libname)
2742 || (fntype && !TYPE_ARG_TYPES (fntype)))
2743 cum->maybe_vaarg = true;
2745 if (TARGET_DEBUG_ARG)
2746 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2751 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2752 But in the case of vector types, it is some vector mode.
2754 When we have only some of our vector isa extensions enabled, then there
2755 are some modes for which vector_mode_supported_p is false. For these
2756 modes, the generic vector support in gcc will choose some non-vector mode
2757 in order to implement the type. By computing the natural mode, we'll
2758 select the proper ABI location for the operand and not depend on whatever
2759 the middle-end decides to do with these vector types. */
2761 static enum machine_mode
2762 type_natural_mode (tree type)
2764 enum machine_mode mode = TYPE_MODE (type);
2766 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2768 HOST_WIDE_INT size = int_size_in_bytes (type);
2769 if ((size == 8 || size == 16)
2770 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2771 && TYPE_VECTOR_SUBPARTS (type) > 1)
2773 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2775 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2776 mode = MIN_MODE_VECTOR_FLOAT;
2778 mode = MIN_MODE_VECTOR_INT;
2780 /* Get the mode which has this inner mode and number of units. */
2781 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2782 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2783 && GET_MODE_INNER (mode) == innermode)
2793 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2794 this may not agree with the mode that the type system has chosen for the
2795 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2796 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2799 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2804 if (orig_mode != BLKmode)
2805 tmp = gen_rtx_REG (orig_mode, regno);
2808 tmp = gen_rtx_REG (mode, regno);
2809 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2810 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2816 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2817 of this code is to classify each 8bytes of incoming argument by the register
2818 class and assign registers accordingly. */
2820 /* Return the union class of CLASS1 and CLASS2.
2821 See the x86-64 PS ABI for details. */
2823 static enum x86_64_reg_class
2824 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2826 /* Rule #1: If both classes are equal, this is the resulting class. */
2827 if (class1 == class2)
2830 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2832 if (class1 == X86_64_NO_CLASS)
2834 if (class2 == X86_64_NO_CLASS)
2837 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2838 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2839 return X86_64_MEMORY_CLASS;
2841 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2842 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2843 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2844 return X86_64_INTEGERSI_CLASS;
2845 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2846 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2847 return X86_64_INTEGER_CLASS;
2849 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2851 if (class1 == X86_64_X87_CLASS
2852 || class1 == X86_64_X87UP_CLASS
2853 || class1 == X86_64_COMPLEX_X87_CLASS
2854 || class2 == X86_64_X87_CLASS
2855 || class2 == X86_64_X87UP_CLASS
2856 || class2 == X86_64_COMPLEX_X87_CLASS)
2857 return X86_64_MEMORY_CLASS;
2859 /* Rule #6: Otherwise class SSE is used. */
2860 return X86_64_SSE_CLASS;
2863 /* Classify the argument of type TYPE and mode MODE.
2864 CLASSES will be filled by the register class used to pass each word
2865 of the operand. The number of words is returned. In case the parameter
2866 should be passed in memory, 0 is returned. As a special case for zero
2867 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2869 BIT_OFFSET is used internally for handling records and specifies offset
2870 of the offset in bits modulo 256 to avoid overflow cases.
2872 See the x86-64 PS ABI for details.
2876 classify_argument (enum machine_mode mode, tree type,
2877 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2879 HOST_WIDE_INT bytes =
2880 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2881 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2883 /* Variable sized entities are always passed/returned in memory. */
2887 if (mode != VOIDmode
2888 && targetm.calls.must_pass_in_stack (mode, type))
2891 if (type && AGGREGATE_TYPE_P (type))
2895 enum x86_64_reg_class subclasses[MAX_CLASSES];
2897 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2901 for (i = 0; i < words; i++)
2902 classes[i] = X86_64_NO_CLASS;
2904 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2905 signalize memory class, so handle it as special case. */
2908 classes[0] = X86_64_NO_CLASS;
2912 /* Classify each field of record and merge classes. */
2913 switch (TREE_CODE (type))
2916 /* For classes first merge in the field of the subclasses. */
2917 if (TYPE_BINFO (type))
2919 tree binfo, base_binfo;
2922 for (binfo = TYPE_BINFO (type), basenum = 0;
2923 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2926 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2927 tree type = BINFO_TYPE (base_binfo);
2929 num = classify_argument (TYPE_MODE (type),
2931 (offset + bit_offset) % 256);
2934 for (i = 0; i < num; i++)
2936 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2938 merge_classes (subclasses[i], classes[i + pos]);
2942 /* And now merge the fields of structure. */
2943 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2945 if (TREE_CODE (field) == FIELD_DECL)
2949 /* Bitfields are always classified as integer. Handle them
2950 early, since later code would consider them to be
2951 misaligned integers. */
2952 if (DECL_BIT_FIELD (field))
2954 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2955 i < ((int_bit_position (field) + (bit_offset % 64))
2956 + tree_low_cst (DECL_SIZE (field), 0)
2959 merge_classes (X86_64_INTEGER_CLASS,
2964 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2965 TREE_TYPE (field), subclasses,
2966 (int_bit_position (field)
2967 + bit_offset) % 256);
2970 for (i = 0; i < num; i++)
2973 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2975 merge_classes (subclasses[i], classes[i + pos]);
2983 /* Arrays are handled as small records. */
2986 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2987 TREE_TYPE (type), subclasses, bit_offset);
2991 /* The partial classes are now full classes. */
2992 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2993 subclasses[0] = X86_64_SSE_CLASS;
2994 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2995 subclasses[0] = X86_64_INTEGER_CLASS;
2997 for (i = 0; i < words; i++)
2998 classes[i] = subclasses[i % num];
3003 case QUAL_UNION_TYPE:
3004 /* Unions are similar to RECORD_TYPE but offset is always 0.
3007 /* Unions are not derived. */
3008 gcc_assert (!TYPE_BINFO (type)
3009 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3010 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3012 if (TREE_CODE (field) == FIELD_DECL)
3015 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3016 TREE_TYPE (field), subclasses,
3020 for (i = 0; i < num; i++)
3021 classes[i] = merge_classes (subclasses[i], classes[i]);
3030 /* Final merger cleanup. */
3031 for (i = 0; i < words; i++)