1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1557 /* X86_ARCH_CALL_ESP: P6 processors will jump to the address after
1558 the decrement (so they will execute return address as code). See
1559 Pentium Pro errata 70, Pentium 2 errata A33, Pentium 3 errata E17. */
1560 ~(m_386 | m_486 | m_PENT | m_PPRO),
1563 static const unsigned int x86_accumulate_outgoing_args
1564 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1567 static const unsigned int x86_arch_always_fancy_math_387
1568 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1569 | m_NOCONA | m_CORE2 | m_GENERIC;
1571 static enum stringop_alg stringop_alg = no_stringop;
1573 /* In case the average insn count for single function invocation is
1574 lower than this constant, emit fast (but longer) prologue and
1576 #define FAST_PROLOGUE_INSN_COUNT 20
1578 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1579 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1580 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1581 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1583 /* Array of the smallest class containing reg number REGNO, indexed by
1584 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1586 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1588 /* ax, dx, cx, bx */
1589 AREG, DREG, CREG, BREG,
1590 /* si, di, bp, sp */
1591 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1593 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1594 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1597 /* flags, fpsr, fpcr, frame */
1598 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1600 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1603 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1606 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1607 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1608 /* SSE REX registers */
1609 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1613 /* The "default" register map used in 32bit mode. */
1615 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1617 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1618 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1619 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1620 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1621 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1622 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1626 /* The "default" register map used in 64bit mode. */
1628 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1630 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1631 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1632 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1633 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1634 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1635 8,9,10,11,12,13,14,15, /* extended integer registers */
1636 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1639 /* Define the register numbers to be used in Dwarf debugging information.
1640 The SVR4 reference port C compiler uses the following register numbers
1641 in its Dwarf output code:
1642 0 for %eax (gcc regno = 0)
1643 1 for %ecx (gcc regno = 2)
1644 2 for %edx (gcc regno = 1)
1645 3 for %ebx (gcc regno = 3)
1646 4 for %esp (gcc regno = 7)
1647 5 for %ebp (gcc regno = 6)
1648 6 for %esi (gcc regno = 4)
1649 7 for %edi (gcc regno = 5)
1650 The following three DWARF register numbers are never generated by
1651 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1652 believes these numbers have these meanings.
1653 8 for %eip (no gcc equivalent)
1654 9 for %eflags (gcc regno = 17)
1655 10 for %trapno (no gcc equivalent)
1656 It is not at all clear how we should number the FP stack registers
1657 for the x86 architecture. If the version of SDB on x86/svr4 were
1658 a bit less brain dead with respect to floating-point then we would
1659 have a precedent to follow with respect to DWARF register numbers
1660 for x86 FP registers, but the SDB on x86/svr4 is so completely
1661 broken with respect to FP registers that it is hardly worth thinking
1662 of it as something to strive for compatibility with.
1663 The version of x86/svr4 SDB I have at the moment does (partially)
1664 seem to believe that DWARF register number 11 is associated with
1665 the x86 register %st(0), but that's about all. Higher DWARF
1666 register numbers don't seem to be associated with anything in
1667 particular, and even for DWARF regno 11, SDB only seems to under-
1668 stand that it should say that a variable lives in %st(0) (when
1669 asked via an `=' command) if we said it was in DWARF regno 11,
1670 but SDB still prints garbage when asked for the value of the
1671 variable in question (via a `/' command).
1672 (Also note that the labels SDB prints for various FP stack regs
1673 when doing an `x' command are all wrong.)
1674 Note that these problems generally don't affect the native SVR4
1675 C compiler because it doesn't allow the use of -O with -g and
1676 because when it is *not* optimizing, it allocates a memory
1677 location for each floating-point variable, and the memory
1678 location is what gets described in the DWARF AT_location
1679 attribute for the variable in question.
1680 Regardless of the severe mental illness of the x86/svr4 SDB, we
1681 do something sensible here and we use the following DWARF
1682 register numbers. Note that these are all stack-top-relative
1684 11 for %st(0) (gcc regno = 8)
1685 12 for %st(1) (gcc regno = 9)
1686 13 for %st(2) (gcc regno = 10)
1687 14 for %st(3) (gcc regno = 11)
1688 15 for %st(4) (gcc regno = 12)
1689 16 for %st(5) (gcc regno = 13)
1690 17 for %st(6) (gcc regno = 14)
1691 18 for %st(7) (gcc regno = 15)
1693 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1695 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1696 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1697 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1698 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1699 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1700 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1701 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1704 /* Test and compare insns in i386.md store the information needed to
1705 generate branch and scc insns here. */
1707 rtx ix86_compare_op0 = NULL_RTX;
1708 rtx ix86_compare_op1 = NULL_RTX;
1710 /* Define parameter passing and return registers. */
1712 static int const x86_64_int_parameter_registers[6] =
1714 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1717 static int const x86_64_ms_abi_int_parameter_registers[4] =
1719 CX_REG, DX_REG, R8_REG, R9_REG
1722 static int const x86_64_int_return_registers[4] =
1724 AX_REG, DX_REG, DI_REG, SI_REG
1727 /* Define the structure for the machine field in struct function. */
1729 struct GTY(()) stack_local_entry {
1730 unsigned short mode;
1733 struct stack_local_entry *next;
1736 /* Structure describing stack frame layout.
1737 Stack grows downward:
1743 saved frame pointer if frame_pointer_needed
1744 <- HARD_FRAME_POINTER
1753 [va_arg registers] (
1754 > to_allocate <- FRAME_POINTER
1766 HOST_WIDE_INT frame;
1768 int outgoing_arguments_size;
1771 HOST_WIDE_INT to_allocate;
1772 /* The offsets relative to ARG_POINTER. */
1773 HOST_WIDE_INT frame_pointer_offset;
1774 HOST_WIDE_INT hard_frame_pointer_offset;
1775 HOST_WIDE_INT stack_pointer_offset;
1777 /* When save_regs_using_mov is set, emit prologue using
1778 move instead of push instructions. */
1779 bool save_regs_using_mov;
1782 /* Code model option. */
1783 enum cmodel ix86_cmodel;
1785 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1787 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1789 /* Which unit we are generating floating point math for. */
1790 enum fpmath_unit ix86_fpmath;
1792 /* Which cpu are we scheduling for. */
1793 enum attr_cpu ix86_schedule;
1795 /* Which cpu are we optimizing for. */
1796 enum processor_type ix86_tune;
1798 /* Which instruction set architecture to use. */
1799 enum processor_type ix86_arch;
1801 /* true if sse prefetch instruction is not NOOP. */
1802 int x86_prefetch_sse;
1804 /* ix86_regparm_string as a number */
1805 static int ix86_regparm;
1807 /* -mstackrealign option */
1808 extern int ix86_force_align_arg_pointer;
1809 static const char ix86_force_align_arg_pointer_string[]
1810 = "force_align_arg_pointer";
1812 static rtx (*ix86_gen_leave) (void);
1813 static rtx (*ix86_gen_pop1) (rtx);
1814 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1816 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1817 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1818 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1819 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1821 /* Preferred alignment for stack boundary in bits. */
1822 unsigned int ix86_preferred_stack_boundary;
1824 /* Alignment for incoming stack boundary in bits specified at
1826 static unsigned int ix86_user_incoming_stack_boundary;
1828 /* Default alignment for incoming stack boundary in bits. */
1829 static unsigned int ix86_default_incoming_stack_boundary;
1831 /* Alignment for incoming stack boundary in bits. */
1832 unsigned int ix86_incoming_stack_boundary;
1834 /* The abi used by target. */
1835 enum calling_abi ix86_abi;
1837 /* Values 1-5: see jump.c */
1838 int ix86_branch_cost;
1840 /* Calling abi specific va_list type nodes. */
1841 static GTY(()) tree sysv_va_list_type_node;
1842 static GTY(()) tree ms_va_list_type_node;
1844 /* Variables which are this size or smaller are put in the data/bss
1845 or ldata/lbss sections. */
1847 int ix86_section_threshold = 65536;
1849 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1850 char internal_label_prefix[16];
1851 int internal_label_prefix_len;
1853 /* Fence to use after loop using movnt. */
1856 /* Register class used for passing given 64bit part of the argument.
1857 These represent classes as documented by the PS ABI, with the exception
1858 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1859 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1861 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1862 whenever possible (upper half does contain padding). */
1863 enum x86_64_reg_class
1866 X86_64_INTEGER_CLASS,
1867 X86_64_INTEGERSI_CLASS,
1874 X86_64_COMPLEX_X87_CLASS,
1878 #define MAX_CLASSES 4
1880 /* Table of constants used by fldpi, fldln2, etc.... */
1881 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1882 static bool ext_80387_constants_init = 0;
1885 static struct machine_function * ix86_init_machine_status (void);
1886 static rtx ix86_function_value (const_tree, const_tree, bool);
1887 static rtx ix86_static_chain (const_tree, bool);
1888 static int ix86_function_regparm (const_tree, const_tree);
1889 static void ix86_compute_frame_layout (struct ix86_frame *);
1890 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1892 static void ix86_add_new_builtins (int);
1894 enum ix86_function_specific_strings
1896 IX86_FUNCTION_SPECIFIC_ARCH,
1897 IX86_FUNCTION_SPECIFIC_TUNE,
1898 IX86_FUNCTION_SPECIFIC_FPMATH,
1899 IX86_FUNCTION_SPECIFIC_MAX
1902 static char *ix86_target_string (int, int, const char *, const char *,
1903 const char *, bool);
1904 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1905 static void ix86_function_specific_save (struct cl_target_option *);
1906 static void ix86_function_specific_restore (struct cl_target_option *);
1907 static void ix86_function_specific_print (FILE *, int,
1908 struct cl_target_option *);
1909 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1910 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1911 static bool ix86_can_inline_p (tree, tree);
1912 static void ix86_set_current_function (tree);
1913 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
1915 static enum calling_abi ix86_function_abi (const_tree);
1918 /* The svr4 ABI for the i386 says that records and unions are returned
1920 #ifndef DEFAULT_PCC_STRUCT_RETURN
1921 #define DEFAULT_PCC_STRUCT_RETURN 1
1924 /* Whether -mtune= or -march= were specified */
1925 static int ix86_tune_defaulted;
1926 static int ix86_arch_specified;
1928 /* Bit flags that specify the ISA we are compiling for. */
1929 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1931 /* A mask of ix86_isa_flags that includes bit X if X
1932 was set or cleared on the command line. */
1933 static int ix86_isa_flags_explicit;
1935 /* Define a set of ISAs which are available when a given ISA is
1936 enabled. MMX and SSE ISAs are handled separately. */
1938 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1939 #define OPTION_MASK_ISA_3DNOW_SET \
1940 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1942 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1943 #define OPTION_MASK_ISA_SSE2_SET \
1944 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1945 #define OPTION_MASK_ISA_SSE3_SET \
1946 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1947 #define OPTION_MASK_ISA_SSSE3_SET \
1948 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1949 #define OPTION_MASK_ISA_SSE4_1_SET \
1950 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1951 #define OPTION_MASK_ISA_SSE4_2_SET \
1952 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1953 #define OPTION_MASK_ISA_AVX_SET \
1954 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1955 #define OPTION_MASK_ISA_FMA_SET \
1956 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1958 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1960 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1962 #define OPTION_MASK_ISA_SSE4A_SET \
1963 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1964 #define OPTION_MASK_ISA_FMA4_SET \
1965 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
1966 | OPTION_MASK_ISA_AVX_SET)
1967 #define OPTION_MASK_ISA_XOP_SET \
1968 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
1969 #define OPTION_MASK_ISA_LWP_SET \
1972 /* AES and PCLMUL need SSE2 because they use xmm registers */
1973 #define OPTION_MASK_ISA_AES_SET \
1974 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1975 #define OPTION_MASK_ISA_PCLMUL_SET \
1976 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1978 #define OPTION_MASK_ISA_ABM_SET \
1979 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1981 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1982 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1983 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1984 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1985 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1987 /* Define a set of ISAs which aren't available when a given ISA is
1988 disabled. MMX and SSE ISAs are handled separately. */
1990 #define OPTION_MASK_ISA_MMX_UNSET \
1991 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1992 #define OPTION_MASK_ISA_3DNOW_UNSET \
1993 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1994 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1996 #define OPTION_MASK_ISA_SSE_UNSET \
1997 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1998 #define OPTION_MASK_ISA_SSE2_UNSET \
1999 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2000 #define OPTION_MASK_ISA_SSE3_UNSET \
2001 (OPTION_MASK_ISA_SSE3 \
2002 | OPTION_MASK_ISA_SSSE3_UNSET \
2003 | OPTION_MASK_ISA_SSE4A_UNSET )
2004 #define OPTION_MASK_ISA_SSSE3_UNSET \
2005 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2006 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2007 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2008 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2009 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2010 #define OPTION_MASK_ISA_AVX_UNSET \
2011 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2012 | OPTION_MASK_ISA_FMA4_UNSET)
2013 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2015 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2017 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2019 #define OPTION_MASK_ISA_SSE4A_UNSET \
2020 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2022 #define OPTION_MASK_ISA_FMA4_UNSET \
2023 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2024 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2025 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2027 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2028 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2029 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2030 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2031 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2032 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2033 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2034 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2036 /* Vectorization library interface and handlers. */
2037 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2038 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2039 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2041 /* Processor target table, indexed by processor number */
2044 const struct processor_costs *cost; /* Processor costs */
2045 const int align_loop; /* Default alignments. */
2046 const int align_loop_max_skip;
2047 const int align_jump;
2048 const int align_jump_max_skip;
2049 const int align_func;
2052 static const struct ptt processor_target_table[PROCESSOR_max] =
2054 {&i386_cost, 4, 3, 4, 3, 4},
2055 {&i486_cost, 16, 15, 16, 15, 16},
2056 {&pentium_cost, 16, 7, 16, 7, 16},
2057 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2058 {&geode_cost, 0, 0, 0, 0, 0},
2059 {&k6_cost, 32, 7, 32, 7, 32},
2060 {&athlon_cost, 16, 7, 16, 7, 16},
2061 {&pentium4_cost, 0, 0, 0, 0, 0},
2062 {&k8_cost, 16, 7, 16, 7, 16},
2063 {&nocona_cost, 0, 0, 0, 0, 0},
2064 {&core2_cost, 16, 10, 16, 10, 16},
2065 {&generic32_cost, 16, 7, 16, 7, 16},
2066 {&generic64_cost, 16, 10, 16, 10, 16},
2067 {&amdfam10_cost, 32, 24, 32, 7, 32},
2068 {&atom_cost, 16, 7, 16, 7, 16}
2071 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2097 /* Implement TARGET_HANDLE_OPTION. */
2100 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2107 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2112 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2113 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2120 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2121 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2125 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2126 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2136 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2141 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2149 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2154 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2162 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2167 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2175 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2180 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2188 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2193 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2201 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2206 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2214 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2219 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2238 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2243 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2250 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2255 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2256 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2263 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2268 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2269 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2276 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2281 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2282 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2289 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2294 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2295 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2302 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2307 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2308 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2315 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2320 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2321 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2328 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2333 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2334 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2341 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2346 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2347 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2354 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2359 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2367 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2368 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2372 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2373 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2380 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2381 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2385 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2386 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2393 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2394 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2398 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2399 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2408 /* Return a string the documents the current -m options. The caller is
2409 responsible for freeing the string. */
2412 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2413 const char *fpmath, bool add_nl_p)
2415 struct ix86_target_opts
2417 const char *option; /* option string */
2418 int mask; /* isa mask options */
2421 /* This table is ordered so that options like -msse4.2 that imply
2422 preceding options while match those first. */
2423 static struct ix86_target_opts isa_opts[] =
2425 { "-m64", OPTION_MASK_ISA_64BIT },
2426 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2427 { "-mxop", OPTION_MASK_ISA_XOP },
2428 { "-mlwp", OPTION_MASK_ISA_LWP },
2429 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2430 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2431 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2432 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2433 { "-msse3", OPTION_MASK_ISA_SSE3 },
2434 { "-msse2", OPTION_MASK_ISA_SSE2 },
2435 { "-msse", OPTION_MASK_ISA_SSE },
2436 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2437 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2438 { "-mmmx", OPTION_MASK_ISA_MMX },
2439 { "-mabm", OPTION_MASK_ISA_ABM },
2440 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2441 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2442 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2443 { "-maes", OPTION_MASK_ISA_AES },
2444 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2448 static struct ix86_target_opts flag_opts[] =
2450 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2451 { "-m80387", MASK_80387 },
2452 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2453 { "-malign-double", MASK_ALIGN_DOUBLE },
2454 { "-mcld", MASK_CLD },
2455 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2456 { "-mieee-fp", MASK_IEEE_FP },
2457 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2458 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2459 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2460 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2461 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2462 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2463 { "-mno-red-zone", MASK_NO_RED_ZONE },
2464 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2465 { "-mrecip", MASK_RECIP },
2466 { "-mrtd", MASK_RTD },
2467 { "-msseregparm", MASK_SSEREGPARM },
2468 { "-mstack-arg-probe", MASK_STACK_PROBE },
2469 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2472 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2475 char target_other[40];
2484 memset (opts, '\0', sizeof (opts));
2486 /* Add -march= option. */
2489 opts[num][0] = "-march=";
2490 opts[num++][1] = arch;
2493 /* Add -mtune= option. */
2496 opts[num][0] = "-mtune=";
2497 opts[num++][1] = tune;
2500 /* Pick out the options in isa options. */
2501 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2503 if ((isa & isa_opts[i].mask) != 0)
2505 opts[num++][0] = isa_opts[i].option;
2506 isa &= ~ isa_opts[i].mask;
2510 if (isa && add_nl_p)
2512 opts[num++][0] = isa_other;
2513 sprintf (isa_other, "(other isa: 0x%x)", isa);
2516 /* Add flag options. */
2517 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2519 if ((flags & flag_opts[i].mask) != 0)
2521 opts[num++][0] = flag_opts[i].option;
2522 flags &= ~ flag_opts[i].mask;
2526 if (flags && add_nl_p)
2528 opts[num++][0] = target_other;
2529 sprintf (target_other, "(other flags: 0x%x)", isa);
2532 /* Add -fpmath= option. */
2535 opts[num][0] = "-mfpmath=";
2536 opts[num++][1] = fpmath;
2543 gcc_assert (num < ARRAY_SIZE (opts));
2545 /* Size the string. */
2547 sep_len = (add_nl_p) ? 3 : 1;
2548 for (i = 0; i < num; i++)
2551 for (j = 0; j < 2; j++)
2553 len += strlen (opts[i][j]);
2556 /* Build the string. */
2557 ret = ptr = (char *) xmalloc (len);
2560 for (i = 0; i < num; i++)
2564 for (j = 0; j < 2; j++)
2565 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2572 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2580 for (j = 0; j < 2; j++)
2583 memcpy (ptr, opts[i][j], len2[j]);
2585 line_len += len2[j];
2590 gcc_assert (ret + len >= ptr);
2595 /* Function that is callable from the debugger to print the current
2598 ix86_debug_options (void)
2600 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2601 ix86_arch_string, ix86_tune_string,
2602 ix86_fpmath_string, true);
2606 fprintf (stderr, "%s\n\n", opts);
2610 fputs ("<no options>\n\n", stderr);
2615 /* Sometimes certain combinations of command options do not make
2616 sense on a particular target machine. You can define a macro
2617 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2618 defined, is executed once just after all the command options have
2621 Don't use this macro to turn on various extra optimizations for
2622 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2625 override_options (bool main_args_p)
2628 unsigned int ix86_arch_mask, ix86_tune_mask;
2633 /* Comes from final.c -- no real reason to change it. */
2634 #define MAX_CODE_ALIGN 16
2642 PTA_PREFETCH_SSE = 1 << 4,
2644 PTA_3DNOW_A = 1 << 6,
2648 PTA_POPCNT = 1 << 10,
2650 PTA_SSE4A = 1 << 12,
2651 PTA_NO_SAHF = 1 << 13,
2652 PTA_SSE4_1 = 1 << 14,
2653 PTA_SSE4_2 = 1 << 15,
2655 PTA_PCLMUL = 1 << 17,
2658 PTA_MOVBE = 1 << 20,
2666 const char *const name; /* processor name or nickname. */
2667 const enum processor_type processor;
2668 const enum attr_cpu schedule;
2669 const unsigned /*enum pta_flags*/ flags;
2671 const processor_alias_table[] =
2673 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2674 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2675 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2676 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2677 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2678 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2679 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2680 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2681 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2682 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2683 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2684 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2685 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2687 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2689 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2690 PTA_MMX | PTA_SSE | PTA_SSE2},
2691 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2692 PTA_MMX |PTA_SSE | PTA_SSE2},
2693 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2694 PTA_MMX | PTA_SSE | PTA_SSE2},
2695 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2696 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2697 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2698 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2699 | PTA_CX16 | PTA_NO_SAHF},
2700 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2701 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2702 | PTA_SSSE3 | PTA_CX16},
2703 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2704 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2705 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2706 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2707 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2708 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2709 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2710 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2711 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2712 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2713 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2714 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2715 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2716 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2717 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2718 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2719 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2720 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2721 {"x86-64", PROCESSOR_K8, CPU_K8,
2722 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2723 {"k8", PROCESSOR_K8, CPU_K8,
2724 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2725 | PTA_SSE2 | PTA_NO_SAHF},
2726 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2727 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2728 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2729 {"opteron", PROCESSOR_K8, CPU_K8,
2730 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2731 | PTA_SSE2 | PTA_NO_SAHF},
2732 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2733 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2734 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2735 {"athlon64", PROCESSOR_K8, CPU_K8,
2736 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2737 | PTA_SSE2 | PTA_NO_SAHF},
2738 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2739 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2740 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2741 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2742 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2743 | PTA_SSE2 | PTA_NO_SAHF},
2744 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2745 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2746 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2747 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2748 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2749 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2750 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2751 0 /* flags are only used for -march switch. */ },
2752 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2753 PTA_64BIT /* flags are only used for -march switch. */ },
2756 int const pta_size = ARRAY_SIZE (processor_alias_table);
2758 /* Set up prefix/suffix so the error messages refer to either the command
2759 line argument, or the attribute(target). */
2768 prefix = "option(\"";
2773 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2774 SUBTARGET_OVERRIDE_OPTIONS;
2777 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2778 SUBSUBTARGET_OVERRIDE_OPTIONS;
2781 /* -fPIC is the default for x86_64. */
2782 if (TARGET_MACHO && TARGET_64BIT)
2785 /* Set the default values for switches whose default depends on TARGET_64BIT
2786 in case they weren't overwritten by command line options. */
2789 /* Mach-O doesn't support omitting the frame pointer for now. */
2790 if (flag_omit_frame_pointer == 2)
2791 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2792 if (flag_asynchronous_unwind_tables == 2)
2793 flag_asynchronous_unwind_tables = 1;
2794 if (flag_pcc_struct_return == 2)
2795 flag_pcc_struct_return = 0;
2799 if (flag_omit_frame_pointer == 2)
2800 flag_omit_frame_pointer = 0;
2801 if (flag_asynchronous_unwind_tables == 2)
2802 flag_asynchronous_unwind_tables = 0;
2803 if (flag_pcc_struct_return == 2)
2804 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2807 /* Need to check -mtune=generic first. */
2808 if (ix86_tune_string)
2810 if (!strcmp (ix86_tune_string, "generic")
2811 || !strcmp (ix86_tune_string, "i686")
2812 /* As special support for cross compilers we read -mtune=native
2813 as -mtune=generic. With native compilers we won't see the
2814 -mtune=native, as it was changed by the driver. */
2815 || !strcmp (ix86_tune_string, "native"))
2818 ix86_tune_string = "generic64";
2820 ix86_tune_string = "generic32";
2822 /* If this call is for setting the option attribute, allow the
2823 generic32/generic64 that was previously set. */
2824 else if (!main_args_p
2825 && (!strcmp (ix86_tune_string, "generic32")
2826 || !strcmp (ix86_tune_string, "generic64")))
2828 else if (!strncmp (ix86_tune_string, "generic", 7))
2829 error ("bad value (%s) for %stune=%s %s",
2830 ix86_tune_string, prefix, suffix, sw);
2834 if (ix86_arch_string)
2835 ix86_tune_string = ix86_arch_string;
2836 if (!ix86_tune_string)
2838 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2839 ix86_tune_defaulted = 1;
2842 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2843 need to use a sensible tune option. */
2844 if (!strcmp (ix86_tune_string, "generic")
2845 || !strcmp (ix86_tune_string, "x86-64")
2846 || !strcmp (ix86_tune_string, "i686"))
2849 ix86_tune_string = "generic64";
2851 ix86_tune_string = "generic32";
2854 if (ix86_stringop_string)
2856 if (!strcmp (ix86_stringop_string, "rep_byte"))
2857 stringop_alg = rep_prefix_1_byte;
2858 else if (!strcmp (ix86_stringop_string, "libcall"))
2859 stringop_alg = libcall;
2860 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2861 stringop_alg = rep_prefix_4_byte;
2862 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2864 /* rep; movq isn't available in 32-bit code. */
2865 stringop_alg = rep_prefix_8_byte;
2866 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2867 stringop_alg = loop_1_byte;
2868 else if (!strcmp (ix86_stringop_string, "loop"))
2869 stringop_alg = loop;
2870 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2871 stringop_alg = unrolled_loop;
2873 error ("bad value (%s) for %sstringop-strategy=%s %s",
2874 ix86_stringop_string, prefix, suffix, sw);
2876 if (!strcmp (ix86_tune_string, "x86-64"))
2877 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2878 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2879 prefix, suffix, prefix, suffix, prefix, suffix);
2881 if (!ix86_arch_string)
2882 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2884 ix86_arch_specified = 1;
2886 if (!strcmp (ix86_arch_string, "generic"))
2887 error ("generic CPU can be used only for %stune=%s %s",
2888 prefix, suffix, sw);
2889 if (!strncmp (ix86_arch_string, "generic", 7))
2890 error ("bad value (%s) for %sarch=%s %s",
2891 ix86_arch_string, prefix, suffix, sw);
2893 /* Validate -mabi= value. */
2894 if (ix86_abi_string)
2896 if (strcmp (ix86_abi_string, "sysv") == 0)
2897 ix86_abi = SYSV_ABI;
2898 else if (strcmp (ix86_abi_string, "ms") == 0)
2901 error ("unknown ABI (%s) for %sabi=%s %s",
2902 ix86_abi_string, prefix, suffix, sw);
2905 ix86_abi = DEFAULT_ABI;
2907 if (ix86_cmodel_string != 0)
2909 if (!strcmp (ix86_cmodel_string, "small"))
2910 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2911 else if (!strcmp (ix86_cmodel_string, "medium"))
2912 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2913 else if (!strcmp (ix86_cmodel_string, "large"))
2914 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2916 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2917 else if (!strcmp (ix86_cmodel_string, "32"))
2918 ix86_cmodel = CM_32;
2919 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2920 ix86_cmodel = CM_KERNEL;
2922 error ("bad value (%s) for %scmodel=%s %s",
2923 ix86_cmodel_string, prefix, suffix, sw);
2927 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2928 use of rip-relative addressing. This eliminates fixups that
2929 would otherwise be needed if this object is to be placed in a
2930 DLL, and is essentially just as efficient as direct addressing. */
2931 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2932 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2933 else if (TARGET_64BIT)
2934 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2936 ix86_cmodel = CM_32;
2938 if (ix86_asm_string != 0)
2941 && !strcmp (ix86_asm_string, "intel"))
2942 ix86_asm_dialect = ASM_INTEL;
2943 else if (!strcmp (ix86_asm_string, "att"))
2944 ix86_asm_dialect = ASM_ATT;
2946 error ("bad value (%s) for %sasm=%s %s",
2947 ix86_asm_string, prefix, suffix, sw);
2949 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2950 error ("code model %qs not supported in the %s bit mode",
2951 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2952 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2953 sorry ("%i-bit mode not compiled in",
2954 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2956 for (i = 0; i < pta_size; i++)
2957 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2959 ix86_schedule = processor_alias_table[i].schedule;
2960 ix86_arch = processor_alias_table[i].processor;
2961 /* Default cpu tuning to the architecture. */
2962 ix86_tune = ix86_arch;
2964 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2965 error ("CPU you selected does not support x86-64 "
2968 if (processor_alias_table[i].flags & PTA_MMX
2969 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2970 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2971 if (processor_alias_table[i].flags & PTA_3DNOW
2972 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2973 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2974 if (processor_alias_table[i].flags & PTA_3DNOW_A
2975 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2976 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2977 if (processor_alias_table[i].flags & PTA_SSE
2978 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2979 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2980 if (processor_alias_table[i].flags & PTA_SSE2
2981 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2982 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2983 if (processor_alias_table[i].flags & PTA_SSE3
2984 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2985 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2986 if (processor_alias_table[i].flags & PTA_SSSE3
2987 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2988 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2989 if (processor_alias_table[i].flags & PTA_SSE4_1
2990 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2991 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2992 if (processor_alias_table[i].flags & PTA_SSE4_2
2993 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2994 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2995 if (processor_alias_table[i].flags & PTA_AVX
2996 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2997 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2998 if (processor_alias_table[i].flags & PTA_FMA
2999 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3000 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3001 if (processor_alias_table[i].flags & PTA_SSE4A
3002 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3003 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3004 if (processor_alias_table[i].flags & PTA_FMA4
3005 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3006 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3007 if (processor_alias_table[i].flags & PTA_XOP
3008 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3009 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3010 if (processor_alias_table[i].flags & PTA_LWP
3011 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3012 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3013 if (processor_alias_table[i].flags & PTA_ABM
3014 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3015 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3016 if (processor_alias_table[i].flags & PTA_CX16
3017 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3018 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3019 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3020 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3021 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3022 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3023 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3024 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3025 if (processor_alias_table[i].flags & PTA_MOVBE
3026 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3027 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3028 if (processor_alias_table[i].flags & PTA_AES
3029 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3030 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3031 if (processor_alias_table[i].flags & PTA_PCLMUL
3032 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3033 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3034 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3035 x86_prefetch_sse = true;
3041 error ("bad value (%s) for %sarch=%s %s",
3042 ix86_arch_string, prefix, suffix, sw);
3044 ix86_arch_mask = 1u << ix86_arch;
3045 for (i = 0; i < X86_ARCH_LAST; ++i)
3046 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3048 for (i = 0; i < pta_size; i++)
3049 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3051 ix86_schedule = processor_alias_table[i].schedule;
3052 ix86_tune = processor_alias_table[i].processor;
3053 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3055 if (ix86_tune_defaulted)
3057 ix86_tune_string = "x86-64";
3058 for (i = 0; i < pta_size; i++)
3059 if (! strcmp (ix86_tune_string,
3060 processor_alias_table[i].name))
3062 ix86_schedule = processor_alias_table[i].schedule;
3063 ix86_tune = processor_alias_table[i].processor;
3066 error ("CPU you selected does not support x86-64 "
3069 /* Intel CPUs have always interpreted SSE prefetch instructions as
3070 NOPs; so, we can enable SSE prefetch instructions even when
3071 -mtune (rather than -march) points us to a processor that has them.
3072 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3073 higher processors. */
3075 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3076 x86_prefetch_sse = true;
3080 error ("bad value (%s) for %stune=%s %s",
3081 ix86_tune_string, prefix, suffix, sw);
3083 ix86_tune_mask = 1u << ix86_tune;
3084 for (i = 0; i < X86_TUNE_LAST; ++i)
3085 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3088 ix86_cost = &ix86_size_cost;
3090 ix86_cost = processor_target_table[ix86_tune].cost;
3092 /* Arrange to set up i386_stack_locals for all functions. */
3093 init_machine_status = ix86_init_machine_status;
3095 /* Validate -mregparm= value. */
3096 if (ix86_regparm_string)
3099 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3100 i = atoi (ix86_regparm_string);
3101 if (i < 0 || i > REGPARM_MAX)
3102 error ("%sregparm=%d%s is not between 0 and %d",
3103 prefix, i, suffix, REGPARM_MAX);
3108 ix86_regparm = REGPARM_MAX;
3110 /* If the user has provided any of the -malign-* options,
3111 warn and use that value only if -falign-* is not set.
3112 Remove this code in GCC 3.2 or later. */
3113 if (ix86_align_loops_string)
3115 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3116 prefix, suffix, suffix);
3117 if (align_loops == 0)
3119 i = atoi (ix86_align_loops_string);
3120 if (i < 0 || i > MAX_CODE_ALIGN)
3121 error ("%salign-loops=%d%s is not between 0 and %d",
3122 prefix, i, suffix, MAX_CODE_ALIGN);
3124 align_loops = 1 << i;
3128 if (ix86_align_jumps_string)
3130 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3131 prefix, suffix, suffix);
3132 if (align_jumps == 0)
3134 i = atoi (ix86_align_jumps_string);
3135 if (i < 0 || i > MAX_CODE_ALIGN)
3136 error ("%salign-loops=%d%s is not between 0 and %d",
3137 prefix, i, suffix, MAX_CODE_ALIGN);
3139 align_jumps = 1 << i;
3143 if (ix86_align_funcs_string)
3145 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3146 prefix, suffix, suffix);
3147 if (align_functions == 0)
3149 i = atoi (ix86_align_funcs_string);
3150 if (i < 0 || i > MAX_CODE_ALIGN)
3151 error ("%salign-loops=%d%s is not between 0 and %d",
3152 prefix, i, suffix, MAX_CODE_ALIGN);
3154 align_functions = 1 << i;
3158 /* Default align_* from the processor table. */
3159 if (align_loops == 0)
3161 align_loops = processor_target_table[ix86_tune].align_loop;
3162 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3164 if (align_jumps == 0)
3166 align_jumps = processor_target_table[ix86_tune].align_jump;
3167 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3169 if (align_functions == 0)
3171 align_functions = processor_target_table[ix86_tune].align_func;
3174 /* Validate -mbranch-cost= value, or provide default. */
3175 ix86_branch_cost = ix86_cost->branch_cost;
3176 if (ix86_branch_cost_string)
3178 i = atoi (ix86_branch_cost_string);
3180 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3182 ix86_branch_cost = i;
3184 if (ix86_section_threshold_string)
3186 i = atoi (ix86_section_threshold_string);
3188 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3190 ix86_section_threshold = i;
3193 if (ix86_tls_dialect_string)
3195 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3196 ix86_tls_dialect = TLS_DIALECT_GNU;
3197 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3198 ix86_tls_dialect = TLS_DIALECT_GNU2;
3199 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3200 ix86_tls_dialect = TLS_DIALECT_SUN;
3202 error ("bad value (%s) for %stls-dialect=%s %s",
3203 ix86_tls_dialect_string, prefix, suffix, sw);
3206 if (ix87_precision_string)
3208 i = atoi (ix87_precision_string);
3209 if (i != 32 && i != 64 && i != 80)
3210 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3215 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3217 /* Enable by default the SSE and MMX builtins. Do allow the user to
3218 explicitly disable any of these. In particular, disabling SSE and
3219 MMX for kernel code is extremely useful. */
3220 if (!ix86_arch_specified)
3222 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3223 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3226 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3230 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3232 if (!ix86_arch_specified)
3234 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3236 /* i386 ABI does not specify red zone. It still makes sense to use it
3237 when programmer takes care to stack from being destroyed. */
3238 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3239 target_flags |= MASK_NO_RED_ZONE;
3242 /* Keep nonleaf frame pointers. */
3243 if (flag_omit_frame_pointer)
3244 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3245 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3246 flag_omit_frame_pointer = 1;
3248 /* If we're doing fast math, we don't care about comparison order
3249 wrt NaNs. This lets us use a shorter comparison sequence. */
3250 if (flag_finite_math_only)
3251 target_flags &= ~MASK_IEEE_FP;
3253 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3254 since the insns won't need emulation. */
3255 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3256 target_flags &= ~MASK_NO_FANCY_MATH_387;
3258 /* Likewise, if the target doesn't have a 387, or we've specified
3259 software floating point, don't use 387 inline intrinsics. */
3261 target_flags |= MASK_NO_FANCY_MATH_387;
3263 /* Turn on MMX builtins for -msse. */
3266 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3267 x86_prefetch_sse = true;
3270 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3271 if (TARGET_SSE4_2 || TARGET_ABM)
3272 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3274 /* Validate -mpreferred-stack-boundary= value or default it to
3275 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3276 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3277 if (ix86_preferred_stack_boundary_string)
3279 i = atoi (ix86_preferred_stack_boundary_string);
3280 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3281 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3282 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3284 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3287 /* Set the default value for -mstackrealign. */
3288 if (ix86_force_align_arg_pointer == -1)
3289 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3291 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3293 /* Validate -mincoming-stack-boundary= value or default it to
3294 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3295 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3296 if (ix86_incoming_stack_boundary_string)
3298 i = atoi (ix86_incoming_stack_boundary_string);