1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1010 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1011 | m_NOCONA | m_CORE2 | m_GENERIC;
1012 const int x86_zero_extend_with_and = m_486 | m_PENT;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1015 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1016 const int x86_double_with_add = ~m_386;
1017 const int x86_use_bit_test = m_386;
1018 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1019 | m_K6 | m_CORE2 | m_GENERIC;
1020 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1022 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1023 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1024 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints = 0;
1030 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall = m_PPRO;
1041 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1042 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1043 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1044 | m_CORE2 | m_GENERIC);
1045 const int x86_use_mov0 = m_K6;
1046 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1047 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1048 const int x86_use_xchgb = m_PENT4;
1049 const int x86_read_modify_write = ~m_PENT;
1050 const int x86_read_modify = ~(m_PENT | m_PPRO);
1051 const int x86_split_long_moves = m_PPRO;
1052 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1053 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1055 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1056 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1057 const int x86_qimode_math = ~(0);
1058 const int x86_promote_qi_regs = 0;
1059 /* On PPro this flag is meant to avoid partial register stalls. Just like
1060 the x86_partial_reg_stall this option might be considered for Generic32
1061 if our scheme for avoiding partial stalls was more effective. */
1062 const int x86_himode_math = ~(m_PPRO);
1063 const int x86_promote_hi_regs = m_PPRO;
1064 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1065 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1066 | m_CORE2 | m_GENERIC;
1067 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1068 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1069 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1070 | m_CORE2 | m_GENERIC;
1071 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1072 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1073 /* Enable if integer moves are preferred for DFmode copies */
1074 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1075 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1076 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_CORE2 | m_GENERIC;
1078 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1079 | m_CORE2 | m_GENERIC;
1080 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1081 for outgoing arguments will be computed and placed into the variable
1082 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1083 for each call; instead, the function prologue should increase the stack frame
1084 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1086 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1087 | m_NOCONA | m_PPRO | m_CORE2
1089 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1090 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1091 const int x86_shift1 = ~m_486;
1092 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1093 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1094 | m_NOCONA | m_CORE2 | m_GENERIC;
1095 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1096 that thread 128bit SSE registers as single units versus K8 based chips that
1097 divide SSE registers to two 64bit halves.
1098 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1099 to allow register renaming on 128bit SSE units, but usually results in one
1100 extra microop on 64bit SSE units. Experimental results shows that disabling
1101 this option on P4 brings over 20% SPECfp regression, while enabling it on
1102 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1104 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1105 | m_GENERIC | m_AMDFAM10;
1106 /* Set for machines where the type and dependencies are resolved on SSE
1107 register parts instead of whole registers, so we may maintain just
1108 lower part of scalar values in proper format leaving the upper part
1110 const int x86_sse_split_regs = m_ATHLON_K8;
1111 /* Code generation for scalar reg-reg moves of single and double precision data:
1112 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1116 if (x86_sse_partial_reg_dependency == true)
1121 Code generation for scalar loads of double precision data:
1122 if (x86_sse_split_regs == true)
1123 movlpd mem, reg (gas syntax)
1127 Code generation for unaligned packed loads of single precision data
1128 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1129 if (x86_sse_unaligned_move_optimal)
1132 if (x86_sse_partial_reg_dependency == true)
1144 Code generation for unaligned packed loads of double precision data
1145 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1146 if (x86_sse_unaligned_move_optimal)
1149 if (x86_sse_split_regs == true)
1160 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1161 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1162 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1163 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1164 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1166 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1168 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1169 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1170 /* Some CPU cores are not able to predict more than 4 branch instructions in
1171 the 16 byte window. */
1172 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1173 | m_NOCONA | m_CORE2 | m_GENERIC;
1174 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1175 | m_CORE2 | m_GENERIC;
1176 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1177 /* Compare and exchange was added for 80486. */
1178 const int x86_cmpxchg = ~m_386;
1179 /* Compare and exchange 8 bytes was added for pentium. */
1180 const int x86_cmpxchg8b = ~(m_386 | m_486);
1181 /* Exchange and add was added for 80486. */
1182 const int x86_xadd = ~m_386;
1183 /* Byteswap was added for 80486. */
1184 const int x86_bswap = ~m_386;
1185 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1187 static enum stringop_alg stringop_alg = no_stringop;
1189 /* In case the average insn count for single function invocation is
1190 lower than this constant, emit fast (but longer) prologue and
1192 #define FAST_PROLOGUE_INSN_COUNT 20
1194 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1195 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1196 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1197 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1199 /* Array of the smallest class containing reg number REGNO, indexed by
1200 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1202 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1204 /* ax, dx, cx, bx */
1205 AREG, DREG, CREG, BREG,
1206 /* si, di, bp, sp */
1207 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1209 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1210 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1213 /* flags, fpsr, fpcr, frame */
1214 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1215 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1217 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1219 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1220 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1221 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1225 /* The "default" register map used in 32bit mode. */
1227 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1229 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1230 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1231 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1232 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1233 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1234 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1235 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1238 static int const x86_64_int_parameter_registers[6] =
1240 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1241 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1244 static int const x86_64_int_return_registers[4] =
1246 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1249 /* The "default" register map used in 64bit mode. */
1250 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1252 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1253 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1255 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1256 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1257 8,9,10,11,12,13,14,15, /* extended integer registers */
1258 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1261 /* Define the register numbers to be used in Dwarf debugging information.
1262 The SVR4 reference port C compiler uses the following register numbers
1263 in its Dwarf output code:
1264 0 for %eax (gcc regno = 0)
1265 1 for %ecx (gcc regno = 2)
1266 2 for %edx (gcc regno = 1)
1267 3 for %ebx (gcc regno = 3)
1268 4 for %esp (gcc regno = 7)
1269 5 for %ebp (gcc regno = 6)
1270 6 for %esi (gcc regno = 4)
1271 7 for %edi (gcc regno = 5)
1272 The following three DWARF register numbers are never generated by
1273 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1274 believes these numbers have these meanings.
1275 8 for %eip (no gcc equivalent)
1276 9 for %eflags (gcc regno = 17)
1277 10 for %trapno (no gcc equivalent)
1278 It is not at all clear how we should number the FP stack registers
1279 for the x86 architecture. If the version of SDB on x86/svr4 were
1280 a bit less brain dead with respect to floating-point then we would
1281 have a precedent to follow with respect to DWARF register numbers
1282 for x86 FP registers, but the SDB on x86/svr4 is so completely
1283 broken with respect to FP registers that it is hardly worth thinking
1284 of it as something to strive for compatibility with.
1285 The version of x86/svr4 SDB I have at the moment does (partially)
1286 seem to believe that DWARF register number 11 is associated with
1287 the x86 register %st(0), but that's about all. Higher DWARF
1288 register numbers don't seem to be associated with anything in
1289 particular, and even for DWARF regno 11, SDB only seems to under-
1290 stand that it should say that a variable lives in %st(0) (when
1291 asked via an `=' command) if we said it was in DWARF regno 11,
1292 but SDB still prints garbage when asked for the value of the
1293 variable in question (via a `/' command).
1294 (Also note that the labels SDB prints for various FP stack regs
1295 when doing an `x' command are all wrong.)
1296 Note that these problems generally don't affect the native SVR4
1297 C compiler because it doesn't allow the use of -O with -g and
1298 because when it is *not* optimizing, it allocates a memory
1299 location for each floating-point variable, and the memory
1300 location is what gets described in the DWARF AT_location
1301 attribute for the variable in question.
1302 Regardless of the severe mental illness of the x86/svr4 SDB, we
1303 do something sensible here and we use the following DWARF
1304 register numbers. Note that these are all stack-top-relative
1306 11 for %st(0) (gcc regno = 8)
1307 12 for %st(1) (gcc regno = 9)
1308 13 for %st(2) (gcc regno = 10)
1309 14 for %st(3) (gcc regno = 11)
1310 15 for %st(4) (gcc regno = 12)
1311 16 for %st(5) (gcc regno = 13)
1312 17 for %st(6) (gcc regno = 14)
1313 18 for %st(7) (gcc regno = 15)
1315 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1317 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1318 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1319 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1320 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1321 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1322 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1323 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1326 /* Test and compare insns in i386.md store the information needed to
1327 generate branch and scc insns here. */
1329 rtx ix86_compare_op0 = NULL_RTX;
1330 rtx ix86_compare_op1 = NULL_RTX;
1331 rtx ix86_compare_emitted = NULL_RTX;
1333 /* Size of the register save area. */
1334 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1336 /* Define the structure for the machine field in struct function. */
1338 struct stack_local_entry GTY(())
1340 unsigned short mode;
1343 struct stack_local_entry *next;
1346 /* Structure describing stack frame layout.
1347 Stack grows downward:
1353 saved frame pointer if frame_pointer_needed
1354 <- HARD_FRAME_POINTER
1359 [va_arg registers] (
1360 > to_allocate <- FRAME_POINTER
1370 HOST_WIDE_INT frame;
1372 int outgoing_arguments_size;
1375 HOST_WIDE_INT to_allocate;
1376 /* The offsets relative to ARG_POINTER. */
1377 HOST_WIDE_INT frame_pointer_offset;
1378 HOST_WIDE_INT hard_frame_pointer_offset;
1379 HOST_WIDE_INT stack_pointer_offset;
1381 /* When save_regs_using_mov is set, emit prologue using
1382 move instead of push instructions. */
1383 bool save_regs_using_mov;
1386 /* Code model option. */
1387 enum cmodel ix86_cmodel;
1389 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1391 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1393 /* Which unit we are generating floating point math for. */
1394 enum fpmath_unit ix86_fpmath;
1396 /* Which cpu are we scheduling for. */
1397 enum processor_type ix86_tune;
1398 /* Which instruction set architecture to use. */
1399 enum processor_type ix86_arch;
1401 /* true if sse prefetch instruction is not NOOP. */
1402 int x86_prefetch_sse;
1404 /* true if cmpxchg16b is supported. */
1407 /* ix86_regparm_string as a number */
1408 static int ix86_regparm;
1410 /* -mstackrealign option */
1411 extern int ix86_force_align_arg_pointer;
1412 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1414 /* Preferred alignment for stack boundary in bits. */
1415 unsigned int ix86_preferred_stack_boundary;
1417 /* Values 1-5: see jump.c */
1418 int ix86_branch_cost;
1420 /* Variables which are this size or smaller are put in the data/bss
1421 or ldata/lbss sections. */
1423 int ix86_section_threshold = 65536;
1425 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1426 char internal_label_prefix[16];
1427 int internal_label_prefix_len;
1429 static bool ix86_handle_option (size_t, const char *, int);
1430 static void output_pic_addr_const (FILE *, rtx, int);
1431 static void put_condition_code (enum rtx_code, enum machine_mode,
1433 static const char *get_some_local_dynamic_name (void);
1434 static int get_some_local_dynamic_name_1 (rtx *, void *);
1435 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1436 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1438 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1439 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1441 static rtx get_thread_pointer (int);
1442 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1443 static void get_pc_thunk_name (char [32], unsigned int);
1444 static rtx gen_push (rtx);
1445 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1446 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1447 static struct machine_function * ix86_init_machine_status (void);
1448 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1449 static int ix86_nsaved_regs (void);
1450 static void ix86_emit_save_regs (void);
1451 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1452 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1453 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1454 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1455 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1456 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1457 static int ix86_issue_rate (void);
1458 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1459 static int ia32_multipass_dfa_lookahead (void);
1460 static void ix86_init_mmx_sse_builtins (void);
1461 static rtx x86_this_parameter (tree);
1462 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1463 HOST_WIDE_INT, tree);
1464 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1465 static void x86_file_start (void);
1466 static void ix86_reorg (void);
1467 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1468 static tree ix86_build_builtin_va_list (void);
1469 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1471 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1472 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1473 static bool ix86_vector_mode_supported_p (enum machine_mode);
1475 static int ix86_address_cost (rtx);
1476 static bool ix86_cannot_force_const_mem (rtx);
1477 static rtx ix86_delegitimize_address (rtx);
1479 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1481 struct builtin_description;
1482 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1484 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1486 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1487 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1488 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1489 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1490 static rtx safe_vector_operand (rtx, enum machine_mode);
1491 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1492 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1493 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1494 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1495 static int ix86_fp_comparison_cost (enum rtx_code code);
1496 static unsigned int ix86_select_alt_pic_regnum (void);
1497 static int ix86_save_reg (unsigned int, int);
1498 static void ix86_compute_frame_layout (struct ix86_frame *);
1499 static int ix86_comp_type_attributes (tree, tree);
1500 static int ix86_function_regparm (tree, tree);
1501 const struct attribute_spec ix86_attribute_table[];
1502 static bool ix86_function_ok_for_sibcall (tree, tree);
1503 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1504 static int ix86_value_regno (enum machine_mode, tree, tree);
1505 static bool contains_128bit_aligned_vector_p (tree);
1506 static rtx ix86_struct_value_rtx (tree, int);
1507 static bool ix86_ms_bitfield_layout_p (tree);
1508 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1509 static int extended_reg_mentioned_1 (rtx *, void *);
1510 static bool ix86_rtx_costs (rtx, int, int, int *);
1511 static int min_insn_size (rtx);
1512 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1513 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1514 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1516 static void ix86_init_builtins (void);
1517 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1518 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1519 static tree ix86_builtin_conversion (enum tree_code, tree);
1520 static const char *ix86_mangle_fundamental_type (tree);
1521 static tree ix86_stack_protect_fail (void);
1522 static rtx ix86_internal_arg_pointer (void);
1523 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1524 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1527 /* This function is only used on Solaris. */
1528 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1531 /* Register class used for passing given 64bit part of the argument.
1532 These represent classes as documented by the PS ABI, with the exception
1533 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1534 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1536 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1537 whenever possible (upper half does contain padding).
1539 enum x86_64_reg_class
1542 X86_64_INTEGER_CLASS,
1543 X86_64_INTEGERSI_CLASS,
1550 X86_64_COMPLEX_X87_CLASS,
1553 static const char * const x86_64_reg_class_name[] = {
1554 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1555 "sseup", "x87", "x87up", "cplx87", "no"
1558 #define MAX_CLASSES 4
1560 /* Table of constants used by fldpi, fldln2, etc.... */
1561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1562 static bool ext_80387_constants_init = 0;
1563 static void init_ext_80387_constants (void);
1564 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1565 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1566 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1567 static section *x86_64_elf_select_section (tree decl, int reloc,
1568 unsigned HOST_WIDE_INT align)
1571 /* Initialize the GCC target structure. */
1572 #undef TARGET_ATTRIBUTE_TABLE
1573 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1574 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1575 # undef TARGET_MERGE_DECL_ATTRIBUTES
1576 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1579 #undef TARGET_COMP_TYPE_ATTRIBUTES
1580 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1582 #undef TARGET_INIT_BUILTINS
1583 #define TARGET_INIT_BUILTINS ix86_init_builtins
1584 #undef TARGET_EXPAND_BUILTIN
1585 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1589 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1590 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1592 #undef TARGET_ASM_FUNCTION_EPILOGUE
1593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1595 #undef TARGET_ENCODE_SECTION_INFO
1596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1602 #undef TARGET_ASM_OPEN_PAREN
1603 #define TARGET_ASM_OPEN_PAREN ""
1604 #undef TARGET_ASM_CLOSE_PAREN
1605 #define TARGET_ASM_CLOSE_PAREN ""
1607 #undef TARGET_ASM_ALIGNED_HI_OP
1608 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1609 #undef TARGET_ASM_ALIGNED_SI_OP
1610 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1612 #undef TARGET_ASM_ALIGNED_DI_OP
1613 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1616 #undef TARGET_ASM_UNALIGNED_HI_OP
1617 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1618 #undef TARGET_ASM_UNALIGNED_SI_OP
1619 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1620 #undef TARGET_ASM_UNALIGNED_DI_OP
1621 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1623 #undef TARGET_SCHED_ADJUST_COST
1624 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1625 #undef TARGET_SCHED_ISSUE_RATE
1626 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1627 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1628 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1629 ia32_multipass_dfa_lookahead
1631 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1632 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1635 #undef TARGET_HAVE_TLS
1636 #define TARGET_HAVE_TLS true
1638 #undef TARGET_CANNOT_FORCE_CONST_MEM
1639 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1640 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1641 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1643 #undef TARGET_DELEGITIMIZE_ADDRESS
1644 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1646 #undef TARGET_MS_BITFIELD_LAYOUT_P
1647 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1650 #undef TARGET_BINDS_LOCAL_P
1651 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1654 #undef TARGET_ASM_OUTPUT_MI_THUNK
1655 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1659 #undef TARGET_ASM_FILE_START
1660 #define TARGET_ASM_FILE_START x86_file_start
1662 #undef TARGET_DEFAULT_TARGET_FLAGS
1663 #define TARGET_DEFAULT_TARGET_FLAGS \
1665 | TARGET_64BIT_DEFAULT \
1666 | TARGET_SUBTARGET_DEFAULT \
1667 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1669 #undef TARGET_HANDLE_OPTION
1670 #define TARGET_HANDLE_OPTION ix86_handle_option
1672 #undef TARGET_RTX_COSTS
1673 #define TARGET_RTX_COSTS ix86_rtx_costs
1674 #undef TARGET_ADDRESS_COST
1675 #define TARGET_ADDRESS_COST ix86_address_cost
1677 #undef TARGET_FIXED_CONDITION_CODE_REGS
1678 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1679 #undef TARGET_CC_MODES_COMPATIBLE
1680 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1682 #undef TARGET_MACHINE_DEPENDENT_REORG
1683 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1685 #undef TARGET_BUILD_BUILTIN_VA_LIST
1686 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1688 #undef TARGET_MD_ASM_CLOBBERS
1689 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1691 #undef TARGET_PROMOTE_PROTOTYPES
1692 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1693 #undef TARGET_STRUCT_VALUE_RTX
1694 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1695 #undef TARGET_SETUP_INCOMING_VARARGS
1696 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1697 #undef TARGET_MUST_PASS_IN_STACK
1698 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1699 #undef TARGET_PASS_BY_REFERENCE
1700 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1701 #undef TARGET_INTERNAL_ARG_POINTER
1702 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1703 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1704 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1707 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1709 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1710 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1712 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1713 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1716 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1717 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1720 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1721 #undef TARGET_INSERT_ATTRIBUTES
1722 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1725 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1726 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1728 #undef TARGET_STACK_PROTECT_FAIL
1729 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1731 #undef TARGET_FUNCTION_VALUE
1732 #define TARGET_FUNCTION_VALUE ix86_function_value
1734 struct gcc_target targetm = TARGET_INITIALIZER;
1737 /* The svr4 ABI for the i386 says that records and unions are returned
1739 #ifndef DEFAULT_PCC_STRUCT_RETURN
1740 #define DEFAULT_PCC_STRUCT_RETURN 1
1743 /* Implement TARGET_HANDLE_OPTION. */
1746 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1753 target_flags &= ~MASK_3DNOW_A;
1754 target_flags_explicit |= MASK_3DNOW_A;
1761 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1762 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1769 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1770 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1777 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1778 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1785 target_flags &= ~MASK_SSE4A;
1786 target_flags_explicit |= MASK_SSE4A;
1795 /* Sometimes certain combinations of command options do not make
1796 sense on a particular target machine. You can define a macro
1797 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1798 defined, is executed once just after all the command options have
1801 Don't use this macro to turn on various extra optimizations for
1802 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1805 override_options (void)
1808 int ix86_tune_defaulted = 0;
1810 /* Comes from final.c -- no real reason to change it. */
1811 #define MAX_CODE_ALIGN 16
1815 const struct processor_costs *cost; /* Processor costs */
1816 const int target_enable; /* Target flags to enable. */
1817 const int target_disable; /* Target flags to disable. */
1818 const int align_loop; /* Default alignments. */
1819 const int align_loop_max_skip;
1820 const int align_jump;
1821 const int align_jump_max_skip;
1822 const int align_func;
1824 const processor_target_table[PROCESSOR_max] =
1826 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1827 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1828 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1829 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1830 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1831 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1832 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1833 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1834 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1835 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1836 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1838 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1839 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1842 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1845 const char *const name; /* processor name or nickname. */
1846 const enum processor_type processor;
1847 const enum pta_flags
1853 PTA_PREFETCH_SSE = 16,
1864 const processor_alias_table[] =
1866 {"i386", PROCESSOR_I386, 0},
1867 {"i486", PROCESSOR_I486, 0},
1868 {"i586", PROCESSOR_PENTIUM, 0},
1869 {"pentium", PROCESSOR_PENTIUM, 0},
1870 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1871 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1872 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1873 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1874 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1875 {"i686", PROCESSOR_PENTIUMPRO, 0},
1876 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1877 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1878 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1879 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1880 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1881 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1882 | PTA_MMX | PTA_PREFETCH_SSE},
1883 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1884 | PTA_MMX | PTA_PREFETCH_SSE},
1885 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1886 | PTA_MMX | PTA_PREFETCH_SSE},
1887 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1888 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1889 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1890 | PTA_64BIT | PTA_MMX
1891 | PTA_PREFETCH_SSE | PTA_CX16},
1892 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1894 {"k6", PROCESSOR_K6, PTA_MMX},
1895 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1896 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1897 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1899 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1900 | PTA_3DNOW | PTA_3DNOW_A},
1901 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1902 | PTA_3DNOW_A | PTA_SSE},
1903 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1904 | PTA_3DNOW_A | PTA_SSE},
1905 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1906 | PTA_3DNOW_A | PTA_SSE},
1907 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1908 | PTA_SSE | PTA_SSE2 },
1909 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1910 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1911 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1912 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1913 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1914 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1915 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1916 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1917 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1918 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1919 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1920 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1921 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1922 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1925 int const pta_size = ARRAY_SIZE (processor_alias_table);
1927 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1928 SUBTARGET_OVERRIDE_OPTIONS;
1931 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1932 SUBSUBTARGET_OVERRIDE_OPTIONS;
1935 /* -fPIC is the default for x86_64. */
1936 if (TARGET_MACHO && TARGET_64BIT)
1939 /* Set the default values for switches whose default depends on TARGET_64BIT
1940 in case they weren't overwritten by command line options. */
1943 /* Mach-O doesn't support omitting the frame pointer for now. */
1944 if (flag_omit_frame_pointer == 2)
1945 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1946 if (flag_asynchronous_unwind_tables == 2)
1947 flag_asynchronous_unwind_tables = 1;
1948 if (flag_pcc_struct_return == 2)
1949 flag_pcc_struct_return = 0;
1953 if (flag_omit_frame_pointer == 2)
1954 flag_omit_frame_pointer = 0;
1955 if (flag_asynchronous_unwind_tables == 2)
1956 flag_asynchronous_unwind_tables = 0;
1957 if (flag_pcc_struct_return == 2)
1958 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1961 /* Need to check -mtune=generic first. */
1962 if (ix86_tune_string)
1964 if (!strcmp (ix86_tune_string, "generic")
1965 || !strcmp (ix86_tune_string, "i686")
1966 /* As special support for cross compilers we read -mtune=native
1967 as -mtune=generic. With native compilers we won't see the
1968 -mtune=native, as it was changed by the driver. */
1969 || !strcmp (ix86_tune_string, "native"))
1972 ix86_tune_string = "generic64";
1974 ix86_tune_string = "generic32";
1976 else if (!strncmp (ix86_tune_string, "generic", 7))
1977 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1981 if (ix86_arch_string)
1982 ix86_tune_string = ix86_arch_string;
1983 if (!ix86_tune_string)
1985 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1986 ix86_tune_defaulted = 1;
1989 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1990 need to use a sensible tune option. */
1991 if (!strcmp (ix86_tune_string, "generic")
1992 || !strcmp (ix86_tune_string, "x86-64")
1993 || !strcmp (ix86_tune_string, "i686"))
1996 ix86_tune_string = "generic64";
1998 ix86_tune_string = "generic32";
2001 if (ix86_stringop_string)
2003 if (!strcmp (ix86_stringop_string, "rep_byte"))
2004 stringop_alg = rep_prefix_1_byte;
2005 else if (!strcmp (ix86_stringop_string, "libcall"))
2006 stringop_alg = libcall;
2007 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2008 stringop_alg = rep_prefix_4_byte;
2009 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2010 stringop_alg = rep_prefix_8_byte;
2011 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2012 stringop_alg = loop_1_byte;
2013 else if (!strcmp (ix86_stringop_string, "loop"))
2014 stringop_alg = loop;
2015 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2016 stringop_alg = unrolled_loop;
2018 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2020 if (!strcmp (ix86_tune_string, "x86-64"))
2021 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2022 "-mtune=generic instead as appropriate.");
2024 if (!ix86_arch_string)
2025 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2026 if (!strcmp (ix86_arch_string, "generic"))
2027 error ("generic CPU can be used only for -mtune= switch");
2028 if (!strncmp (ix86_arch_string, "generic", 7))
2029 error ("bad value (%s) for -march= switch", ix86_arch_string);
2031 if (ix86_cmodel_string != 0)
2033 if (!strcmp (ix86_cmodel_string, "small"))
2034 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2035 else if (!strcmp (ix86_cmodel_string, "medium"))
2036 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2038 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2039 else if (!strcmp (ix86_cmodel_string, "32"))
2040 ix86_cmodel = CM_32;
2041 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2042 ix86_cmodel = CM_KERNEL;
2043 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2044 ix86_cmodel = CM_LARGE;
2046 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2050 ix86_cmodel = CM_32;
2052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2054 if (ix86_asm_string != 0)
2057 && !strcmp (ix86_asm_string, "intel"))
2058 ix86_asm_dialect = ASM_INTEL;
2059 else if (!strcmp (ix86_asm_string, "att"))
2060 ix86_asm_dialect = ASM_ATT;
2062 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2064 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2065 error ("code model %qs not supported in the %s bit mode",
2066 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2067 if (ix86_cmodel == CM_LARGE)
2068 sorry ("code model %<large%> not supported yet");
2069 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2070 sorry ("%i-bit mode not compiled in",
2071 (target_flags & MASK_64BIT) ? 64 : 32);
2073 for (i = 0; i < pta_size; i++)
2074 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2076 ix86_arch = processor_alias_table[i].processor;
2077 /* Default cpu tuning to the architecture. */
2078 ix86_tune = ix86_arch;
2079 if (processor_alias_table[i].flags & PTA_MMX
2080 && !(target_flags_explicit & MASK_MMX))
2081 target_flags |= MASK_MMX;
2082 if (processor_alias_table[i].flags & PTA_3DNOW
2083 && !(target_flags_explicit & MASK_3DNOW))
2084 target_flags |= MASK_3DNOW;
2085 if (processor_alias_table[i].flags & PTA_3DNOW_A
2086 && !(target_flags_explicit & MASK_3DNOW_A))
2087 target_flags |= MASK_3DNOW_A;
2088 if (processor_alias_table[i].flags & PTA_SSE
2089 && !(target_flags_explicit & MASK_SSE))
2090 target_flags |= MASK_SSE;
2091 if (processor_alias_table[i].flags & PTA_SSE2
2092 && !(target_flags_explicit & MASK_SSE2))
2093 target_flags |= MASK_SSE2;
2094 if (processor_alias_table[i].flags & PTA_SSE3
2095 && !(target_flags_explicit & MASK_SSE3))
2096 target_flags |= MASK_SSE3;
2097 if (processor_alias_table[i].flags & PTA_SSSE3
2098 && !(target_flags_explicit & MASK_SSSE3))
2099 target_flags |= MASK_SSSE3;
2100 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2101 x86_prefetch_sse = true;
2102 if (processor_alias_table[i].flags & PTA_CX16)
2103 x86_cmpxchg16b = true;
2104 if (processor_alias_table[i].flags & PTA_POPCNT
2105 && !(target_flags_explicit & MASK_POPCNT))
2106 target_flags |= MASK_POPCNT;
2107 if (processor_alias_table[i].flags & PTA_ABM
2108 && !(target_flags_explicit & MASK_ABM))
2109 target_flags |= MASK_ABM;
2110 if (processor_alias_table[i].flags & PTA_SSE4A
2111 && !(target_flags_explicit & MASK_SSE4A))
2112 target_flags |= MASK_SSE4A;
2113 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2114 error ("CPU you selected does not support x86-64 "
2120 error ("bad value (%s) for -march= switch", ix86_arch_string);
2122 for (i = 0; i < pta_size; i++)
2123 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2125 ix86_tune = processor_alias_table[i].processor;
2126 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2128 if (ix86_tune_defaulted)
2130 ix86_tune_string = "x86-64";
2131 for (i = 0; i < pta_size; i++)
2132 if (! strcmp (ix86_tune_string,
2133 processor_alias_table[i].name))
2135 ix86_tune = processor_alias_table[i].processor;
2138 error ("CPU you selected does not support x86-64 "
2141 /* Intel CPUs have always interpreted SSE prefetch instructions as
2142 NOPs; so, we can enable SSE prefetch instructions even when
2143 -mtune (rather than -march) points us to a processor that has them.
2144 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2145 higher processors. */
2146 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2147 x86_prefetch_sse = true;
2151 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2154 ix86_cost = &size_cost;
2156 ix86_cost = processor_target_table[ix86_tune].cost;
2157 target_flags |= processor_target_table[ix86_tune].target_enable;
2158 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2160 /* Arrange to set up i386_stack_locals for all functions. */
2161 init_machine_status = ix86_init_machine_status;
2163 /* Validate -mregparm= value. */
2164 if (ix86_regparm_string)
2166 i = atoi (ix86_regparm_string);
2167 if (i < 0 || i > REGPARM_MAX)
2168 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2174 ix86_regparm = REGPARM_MAX;
2176 /* If the user has provided any of the -malign-* options,
2177 warn and use that value only if -falign-* is not set.
2178 Remove this code in GCC 3.2 or later. */
2179 if (ix86_align_loops_string)
2181 warning (0, "-malign-loops is obsolete, use -falign-loops");
2182 if (align_loops == 0)
2184 i = atoi (ix86_align_loops_string);
2185 if (i < 0 || i > MAX_CODE_ALIGN)
2186 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2188 align_loops = 1 << i;
2192 if (ix86_align_jumps_string)
2194 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2195 if (align_jumps == 0)
2197 i = atoi (ix86_align_jumps_string);
2198 if (i < 0 || i > MAX_CODE_ALIGN)
2199 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2201 align_jumps = 1 << i;
2205 if (ix86_align_funcs_string)
2207 warning (0, "-malign-functions is obsolete, use -falign-functions");
2208 if (align_functions == 0)
2210 i = atoi (ix86_align_funcs_string);
2211 if (i < 0 || i > MAX_CODE_ALIGN)
2212 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2214 align_functions = 1 << i;
2218 /* Default align_* from the processor table. */
2219 if (align_loops == 0)
2221 align_loops = processor_target_table[ix86_tune].align_loop;
2222 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2224 if (align_jumps == 0)
2226 align_jumps = processor_target_table[ix86_tune].align_jump;
2227 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2229 if (align_functions == 0)
2231 align_functions = processor_target_table[ix86_tune].align_func;
2234 /* Validate -mbranch-cost= value, or provide default. */
2235 ix86_branch_cost = ix86_cost->branch_cost;
2236 if (ix86_branch_cost_string)
2238 i = atoi (ix86_branch_cost_string);
2240 error ("-mbranch-cost=%d is not between 0 and 5", i);
2242 ix86_branch_cost = i;
2244 if (ix86_section_threshold_string)
2246 i = atoi (ix86_section_threshold_string);
2248 error ("-mlarge-data-threshold=%d is negative", i);
2250 ix86_section_threshold = i;
2253 if (ix86_tls_dialect_string)
2255 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2256 ix86_tls_dialect = TLS_DIALECT_GNU;
2257 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2258 ix86_tls_dialect = TLS_DIALECT_GNU2;
2259 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2260 ix86_tls_dialect = TLS_DIALECT_SUN;
2262 error ("bad value (%s) for -mtls-dialect= switch",
2263 ix86_tls_dialect_string);
2266 /* Keep nonleaf frame pointers. */
2267 if (flag_omit_frame_pointer)
2268 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2269 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2270 flag_omit_frame_pointer = 1;
2272 /* If we're doing fast math, we don't care about comparison order
2273 wrt NaNs. This lets us use a shorter comparison sequence. */
2274 if (flag_finite_math_only)
2275 target_flags &= ~MASK_IEEE_FP;
2277 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2278 since the insns won't need emulation. */
2279 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2280 target_flags &= ~MASK_NO_FANCY_MATH_387;
2282 /* Likewise, if the target doesn't have a 387, or we've specified
2283 software floating point, don't use 387 inline intrinsics. */
2285 target_flags |= MASK_NO_FANCY_MATH_387;
2287 /* Turn on SSE3 builtins for -mssse3. */
2289 target_flags |= MASK_SSE3;
2291 /* Turn on SSE3 builtins for -msse4a. */
2293 target_flags |= MASK_SSE3;
2295 /* Turn on SSE2 builtins for -msse3. */
2297 target_flags |= MASK_SSE2;
2299 /* Turn on SSE builtins for -msse2. */
2301 target_flags |= MASK_SSE;
2303 /* Turn on MMX builtins for -msse. */
2306 target_flags |= MASK_MMX & ~target_flags_explicit;
2307 x86_prefetch_sse = true;
2310 /* Turn on MMX builtins for 3Dnow. */
2312 target_flags |= MASK_MMX;
2314 /* Turn on POPCNT builtins for -mabm. */
2316 target_flags |= MASK_POPCNT;
2320 if (TARGET_ALIGN_DOUBLE)
2321 error ("-malign-double makes no sense in the 64bit mode");
2323 error ("-mrtd calling convention not supported in the 64bit mode");
2325 /* Enable by default the SSE and MMX builtins. Do allow the user to
2326 explicitly disable any of these. In particular, disabling SSE and
2327 MMX for kernel code is extremely useful. */
2329 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2330 & ~target_flags_explicit);
2334 /* i386 ABI does not specify red zone. It still makes sense to use it
2335 when programmer takes care to stack from being destroyed. */
2336 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2337 target_flags |= MASK_NO_RED_ZONE;
2340 /* Validate -mpreferred-stack-boundary= value, or provide default.
2341 The default of 128 bits is for Pentium III's SSE __m128. We can't
2342 change it because of optimize_size. Otherwise, we can't mix object
2343 files compiled with -Os and -On. */
2344 ix86_preferred_stack_boundary = 128;
2345 if (ix86_preferred_stack_boundary_string)
2347 i = atoi (ix86_preferred_stack_boundary_string);
2348 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2349 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2350 TARGET_64BIT ? 4 : 2);
2352 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2355 /* Accept -msseregparm only if at least SSE support is enabled. */
2356 if (TARGET_SSEREGPARM
2358 error ("-msseregparm used without SSE enabled");
2360 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2362 if (ix86_fpmath_string != 0)
2364 if (! strcmp (ix86_fpmath_string, "387"))
2365 ix86_fpmath = FPMATH_387;
2366 else if (! strcmp (ix86_fpmath_string, "sse"))
2370 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2371 ix86_fpmath = FPMATH_387;
2374 ix86_fpmath = FPMATH_SSE;
2376 else if (! strcmp (ix86_fpmath_string, "387,sse")
2377 || ! strcmp (ix86_fpmath_string, "sse,387"))
2381 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2382 ix86_fpmath = FPMATH_387;
2384 else if (!TARGET_80387)
2386 warning (0, "387 instruction set disabled, using SSE arithmetics");
2387 ix86_fpmath = FPMATH_SSE;
2390 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2393 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2396 /* If the i387 is disabled, then do not return values in it. */
2398 target_flags &= ~MASK_FLOAT_RETURNS;
2400 if ((x86_accumulate_outgoing_args & TUNEMASK)
2401 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2403 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2405 /* ??? Unwind info is not correct around the CFG unless either a frame
2406 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2407 unwind info generation to be aware of the CFG and propagating states
2409 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2410 || flag_exceptions || flag_non_call_exceptions)
2411 && flag_omit_frame_pointer
2412 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2414 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2415 warning (0, "unwind tables currently require either a frame pointer "
2416 "or -maccumulate-outgoing-args for correctness");
2417 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2424 p = strchr (internal_label_prefix, 'X');
2425 internal_label_prefix_len = p - internal_label_prefix;
2429 /* When scheduling description is not available, disable scheduler pass
2430 so it won't slow down the compilation and make x87 code slower. */
2431 if (!TARGET_SCHEDULE)
2432 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2434 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2435 set_param_value ("simultaneous-prefetches",
2436 ix86_cost->simultaneous_prefetches);
2437 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2438 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2441 /* switch to the appropriate section for output of DECL.
2442 DECL is either a `VAR_DECL' node or a constant of some sort.
2443 RELOC indicates whether forming the initial value of DECL requires
2444 link-time relocations. */
2447 x86_64_elf_select_section (tree decl, int reloc,
2448 unsigned HOST_WIDE_INT align)
2450 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2451 && ix86_in_large_data_p (decl))
2453 const char *sname = NULL;
2454 unsigned int flags = SECTION_WRITE;
2455 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2460 case SECCAT_DATA_REL:
2461 sname = ".ldata.rel";
2463 case SECCAT_DATA_REL_LOCAL:
2464 sname = ".ldata.rel.local";
2466 case SECCAT_DATA_REL_RO:
2467 sname = ".ldata.rel.ro";
2469 case SECCAT_DATA_REL_RO_LOCAL:
2470 sname = ".ldata.rel.ro.local";
2474 flags |= SECTION_BSS;
2477 case SECCAT_RODATA_MERGE_STR:
2478 case SECCAT_RODATA_MERGE_STR_INIT:
2479 case SECCAT_RODATA_MERGE_CONST:
2483 case SECCAT_SRODATA:
2490 /* We don't split these for medium model. Place them into
2491 default sections and hope for best. */
2496 /* We might get called with string constants, but get_named_section
2497 doesn't like them as they are not DECLs. Also, we need to set
2498 flags in that case. */
2500 return get_section (sname, flags, NULL);
2501 return get_named_section (decl, sname, reloc);
2504 return default_elf_select_section (decl, reloc, align);
2507 /* Build up a unique section name, expressed as a
2508 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2509 RELOC indicates whether the initial value of EXP requires
2510 link-time relocations. */
2513 x86_64_elf_unique_section (tree decl, int reloc)
2515 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2516 && ix86_in_large_data_p (decl))
2518 const char *prefix = NULL;
2519 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2520 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2522 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2525 case SECCAT_DATA_REL:
2526 case SECCAT_DATA_REL_LOCAL:
2527 case SECCAT_DATA_REL_RO:
2528 case SECCAT_DATA_REL_RO_LOCAL:
2529 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2532 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2535 case SECCAT_RODATA_MERGE_STR:
2536 case SECCAT_RODATA_MERGE_STR_INIT:
2537 case SECCAT_RODATA_MERGE_CONST:
2538 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2540 case SECCAT_SRODATA:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2556 plen = strlen (prefix);
2558 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2559 name = targetm.strip_name_encoding (name);
2560 nlen = strlen (name);
2562 string = alloca (nlen + plen + 1);
2563 memcpy (string, prefix, plen);
2564 memcpy (string + plen, name, nlen + 1);
2566 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2570 default_unique_section (decl, reloc);
2573 #ifdef COMMON_ASM_OP
2574 /* This says how to output assembler code to declare an
2575 uninitialized external linkage data object.
2577 For medium model x86-64 we need to use .largecomm opcode for
2580 x86_elf_aligned_common (FILE *file,
2581 const char *name, unsigned HOST_WIDE_INT size,
2584 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2585 && size > (unsigned int)ix86_section_threshold)
2586 fprintf (file, ".largecomm\t");
2588 fprintf (file, "%s", COMMON_ASM_OP);
2589 assemble_name (file, name);
2590 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2591 size, align / BITS_PER_UNIT);
2594 /* Utility function for targets to use in implementing
2595 ASM_OUTPUT_ALIGNED_BSS. */
2598 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2599 const char *name, unsigned HOST_WIDE_INT size,
2602 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2603 && size > (unsigned int)ix86_section_threshold)
2604 switch_to_section (get_named_section (decl, ".lbss", 0));
2606 switch_to_section (bss_section);
2607 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2608 #ifdef ASM_DECLARE_OBJECT_NAME
2609 last_assemble_variable_decl = decl;
2610 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2612 /* Standard thing is just output label for the object. */
2613 ASM_OUTPUT_LABEL (file, name);
2614 #endif /* ASM_DECLARE_OBJECT_NAME */
2615 ASM_OUTPUT_SKIP (file, size ? size : 1);
2619 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2621 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2622 make the problem with not enough registers even worse. */
2623 #ifdef INSN_SCHEDULING
2625 flag_schedule_insns = 0;
2629 /* The Darwin libraries never set errno, so we might as well
2630 avoid calling them when that's the only reason we would. */
2631 flag_errno_math = 0;
2633 /* The default values of these switches depend on the TARGET_64BIT
2634 that is not known at this moment. Mark these values with 2 and
2635 let user the to override these. In case there is no command line option
2636 specifying them, we will set the defaults in override_options. */
2638 flag_omit_frame_pointer = 2;
2639 flag_pcc_struct_return = 2;
2640 flag_asynchronous_unwind_tables = 2;
2641 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2642 SUBTARGET_OPTIMIZATION_OPTIONS;
2646 /* Table of valid machine attributes. */
2647 const struct attribute_spec ix86_attribute_table[] =
2649 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2650 /* Stdcall attribute says callee is responsible for popping arguments
2651 if they are not variable. */
2652 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2653 /* Fastcall attribute says callee is responsible for popping arguments
2654 if they are not variable. */
2655 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2656 /* Cdecl attribute says the callee is a normal C declaration */
2657 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2658 /* Regparm attribute specifies how many integer arguments are to be
2659 passed in registers. */
2660 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2661 /* Sseregparm attribute says we are using x86_64 calling conventions
2662 for FP arguments. */
2663 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2664 /* force_align_arg_pointer says this function realigns the stack at entry. */
2665 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2666 false, true, true, ix86_handle_cconv_attribute },
2667 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2668 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2669 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2670 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2672 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2673 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2674 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2675 SUBTARGET_ATTRIBUTE_TABLE,
2677 { NULL, 0, 0, false, false, false, NULL }
2680 /* Decide whether we can make a sibling call to a function. DECL is the
2681 declaration of the function being targeted by the call and EXP is the
2682 CALL_EXPR representing the call. */
2685 ix86_function_ok_for_sibcall (tree decl, tree exp)
2690 /* If we are generating position-independent code, we cannot sibcall
2691 optimize any indirect call, or a direct call to a global function,
2692 as the PLT requires %ebx be live. */
2693 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2700 func = TREE_TYPE (CALL_EXPR_FN (exp));
2701 if (POINTER_TYPE_P (func))
2702 func = TREE_TYPE (func);
2705 /* Check that the return value locations are the same. Like
2706 if we are returning floats on the 80387 register stack, we cannot
2707 make a sibcall from a function that doesn't return a float to a
2708 function that does or, conversely, from a function that does return
2709 a float to a function that doesn't; the necessary stack adjustment
2710 would not be executed. This is also the place we notice
2711 differences in the return value ABI. Note that it is ok for one
2712 of the functions to have void return type as long as the return
2713 value of the other is passed in a register. */
2714 a = ix86_function_value (TREE_TYPE (exp), func, false);
2715 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2717 if (STACK_REG_P (a) || STACK_REG_P (b))
2719 if (!rtx_equal_p (a, b))
2722 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2724 else if (!rtx_equal_p (a, b))
2727 /* If this call is indirect, we'll need to be able to use a call-clobbered
2728 register for the address of the target function. Make sure that all
2729 such registers are not used for passing parameters. */
2730 if (!decl && !TARGET_64BIT)
2734 /* We're looking at the CALL_EXPR, we need the type of the function. */
2735 type = CALL_EXPR_FN (exp); /* pointer expression */
2736 type = TREE_TYPE (type); /* pointer type */
2737 type = TREE_TYPE (type); /* function type */
2739 if (ix86_function_regparm (type, NULL) >= 3)
2741 /* ??? Need to count the actual number of registers to be used,
2742 not the possible number of registers. Fix later. */
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 /* Dllimport'd functions are also called indirectly. */
2749 if (decl && DECL_DLLIMPORT_P (decl)
2750 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2754 /* If we forced aligned the stack, then sibcalling would unalign the
2755 stack, which may break the called function. */
2756 if (cfun->machine->force_align_arg_pointer)
2759 /* Otherwise okay. That also includes certain types of indirect calls. */
2763 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2764 calling convention attributes;
2765 arguments as in struct attribute_spec.handler. */
2768 ix86_handle_cconv_attribute (tree *node, tree name,
2770 int flags ATTRIBUTE_UNUSED,
2773 if (TREE_CODE (*node) != FUNCTION_TYPE
2774 && TREE_CODE (*node) != METHOD_TYPE
2775 && TREE_CODE (*node) != FIELD_DECL
2776 && TREE_CODE (*node) != TYPE_DECL)
2778 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2779 IDENTIFIER_POINTER (name));
2780 *no_add_attrs = true;
2784 /* Can combine regparm with all attributes but fastcall. */
2785 if (is_attribute_p ("regparm", name))
2789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2791 error ("fastcall and regparm attributes are not compatible");
2794 cst = TREE_VALUE (args);
2795 if (TREE_CODE (cst) != INTEGER_CST)
2797 warning (OPT_Wattributes,
2798 "%qs attribute requires an integer constant argument",
2799 IDENTIFIER_POINTER (name));
2800 *no_add_attrs = true;
2802 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2804 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2805 IDENTIFIER_POINTER (name), REGPARM_MAX);
2806 *no_add_attrs = true;
2810 && lookup_attribute (ix86_force_align_arg_pointer_string,
2811 TYPE_ATTRIBUTES (*node))
2812 && compare_tree_int (cst, REGPARM_MAX-1))
2814 error ("%s functions limited to %d register parameters",
2815 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2823 warning (OPT_Wattributes, "%qs attribute ignored",
2824 IDENTIFIER_POINTER (name));
2825 *no_add_attrs = true;
2829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2830 if (is_attribute_p ("fastcall", name))
2832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2834 error ("fastcall and cdecl attributes are not compatible");
2836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2838 error ("fastcall and stdcall attributes are not compatible");
2840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2842 error ("fastcall and regparm attributes are not compatible");
2846 /* Can combine stdcall with fastcall (redundant), regparm and
2848 else if (is_attribute_p ("stdcall", name))
2850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2852 error ("stdcall and cdecl attributes are not compatible");
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2856 error ("stdcall and fastcall attributes are not compatible");
2860 /* Can combine cdecl with regparm and sseregparm. */
2861 else if (is_attribute_p ("cdecl", name))
2863 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2865 error ("stdcall and cdecl attributes are not compatible");
2867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2869 error ("fastcall and cdecl attributes are not compatible");
2873 /* Can combine sseregparm with all attributes. */
2878 /* Return 0 if the attributes for two types are incompatible, 1 if they
2879 are compatible, and 2 if they are nearly compatible (which causes a
2880 warning to be generated). */
2883 ix86_comp_type_attributes (tree type1, tree type2)
2885 /* Check for mismatch of non-default calling convention. */
2886 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2888 if (TREE_CODE (type1) != FUNCTION_TYPE)
2891 /* Check for mismatched fastcall/regparm types. */
2892 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2893 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2894 || (ix86_function_regparm (type1, NULL)
2895 != ix86_function_regparm (type2, NULL)))
2898 /* Check for mismatched sseregparm types. */
2899 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2900 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2903 /* Check for mismatched return types (cdecl vs stdcall). */
2904 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2905 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2911 /* Return the regparm value for a function with the indicated TYPE and DECL.
2912 DECL may be NULL when calling function indirectly
2913 or considering a libcall. */
2916 ix86_function_regparm (tree type, tree decl)
2919 int regparm = ix86_regparm;
2920 bool user_convention = false;
2924 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2927 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2928 user_convention = true;