1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1729 struct stack_local_entry *next;
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1762 HOST_WIDE_INT frame;
1764 int outgoing_arguments_size;
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1820 /* Alignment for incoming stack boundary in bits specified at
1822 static unsigned int ix86_user_incoming_stack_boundary;
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1843 int ix86_section_threshold = 65536;
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1849 /* Fence to use after loop using movnt. */
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1874 #define MAX_CLASSES 4
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static bool ix86_function_value_regno_p (const unsigned int);
1884 static rtx ix86_static_chain (const_tree, bool);
1885 static int ix86_function_regparm (const_tree, const_tree);
1886 static void ix86_compute_frame_layout (struct ix86_frame *);
1887 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1889 static void ix86_add_new_builtins (int);
1890 static rtx ix86_expand_vec_perm_builtin (tree);
1892 enum ix86_function_specific_strings
1894 IX86_FUNCTION_SPECIFIC_ARCH,
1895 IX86_FUNCTION_SPECIFIC_TUNE,
1896 IX86_FUNCTION_SPECIFIC_FPMATH,
1897 IX86_FUNCTION_SPECIFIC_MAX
1900 static char *ix86_target_string (int, int, const char *, const char *,
1901 const char *, bool);
1902 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1903 static void ix86_function_specific_save (struct cl_target_option *);
1904 static void ix86_function_specific_restore (struct cl_target_option *);
1905 static void ix86_function_specific_print (FILE *, int,
1906 struct cl_target_option *);
1907 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1908 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1909 static bool ix86_can_inline_p (tree, tree);
1910 static void ix86_set_current_function (tree);
1911 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
1913 static enum calling_abi ix86_function_abi (const_tree);
1916 #ifndef SUBTARGET32_DEFAULT_CPU
1917 #define SUBTARGET32_DEFAULT_CPU "i386"
1920 /* The svr4 ABI for the i386 says that records and unions are returned
1922 #ifndef DEFAULT_PCC_STRUCT_RETURN
1923 #define DEFAULT_PCC_STRUCT_RETURN 1
1926 /* Whether -mtune= or -march= were specified */
1927 static int ix86_tune_defaulted;
1928 static int ix86_arch_specified;
1930 /* Bit flags that specify the ISA we are compiling for. */
1931 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1933 /* A mask of ix86_isa_flags that includes bit X if X
1934 was set or cleared on the command line. */
1935 static int ix86_isa_flags_explicit;
1937 /* Define a set of ISAs which are available when a given ISA is
1938 enabled. MMX and SSE ISAs are handled separately. */
1940 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1941 #define OPTION_MASK_ISA_3DNOW_SET \
1942 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1944 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1945 #define OPTION_MASK_ISA_SSE2_SET \
1946 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1947 #define OPTION_MASK_ISA_SSE3_SET \
1948 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1949 #define OPTION_MASK_ISA_SSSE3_SET \
1950 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1951 #define OPTION_MASK_ISA_SSE4_1_SET \
1952 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1953 #define OPTION_MASK_ISA_SSE4_2_SET \
1954 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1955 #define OPTION_MASK_ISA_AVX_SET \
1956 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1957 #define OPTION_MASK_ISA_FMA_SET \
1958 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1960 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1962 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1964 #define OPTION_MASK_ISA_SSE4A_SET \
1965 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1966 #define OPTION_MASK_ISA_FMA4_SET \
1967 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
1968 | OPTION_MASK_ISA_AVX_SET)
1969 #define OPTION_MASK_ISA_XOP_SET \
1970 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
1971 #define OPTION_MASK_ISA_LWP_SET \
1974 /* AES and PCLMUL need SSE2 because they use xmm registers */
1975 #define OPTION_MASK_ISA_AES_SET \
1976 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1977 #define OPTION_MASK_ISA_PCLMUL_SET \
1978 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1980 #define OPTION_MASK_ISA_ABM_SET \
1981 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1983 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1984 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1985 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1986 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1987 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1989 /* Define a set of ISAs which aren't available when a given ISA is
1990 disabled. MMX and SSE ISAs are handled separately. */
1992 #define OPTION_MASK_ISA_MMX_UNSET \
1993 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1994 #define OPTION_MASK_ISA_3DNOW_UNSET \
1995 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1996 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1998 #define OPTION_MASK_ISA_SSE_UNSET \
1999 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2000 #define OPTION_MASK_ISA_SSE2_UNSET \
2001 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2002 #define OPTION_MASK_ISA_SSE3_UNSET \
2003 (OPTION_MASK_ISA_SSE3 \
2004 | OPTION_MASK_ISA_SSSE3_UNSET \
2005 | OPTION_MASK_ISA_SSE4A_UNSET )
2006 #define OPTION_MASK_ISA_SSSE3_UNSET \
2007 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2008 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2009 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2010 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2011 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2012 #define OPTION_MASK_ISA_AVX_UNSET \
2013 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2014 | OPTION_MASK_ISA_FMA4_UNSET)
2015 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2017 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2019 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2021 #define OPTION_MASK_ISA_SSE4A_UNSET \
2022 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2024 #define OPTION_MASK_ISA_FMA4_UNSET \
2025 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2026 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2027 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2029 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2030 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2031 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2032 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2033 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2034 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2035 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2036 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2038 /* Vectorization library interface and handlers. */
2039 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2040 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2041 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2043 /* Processor target table, indexed by processor number */
2046 const struct processor_costs *cost; /* Processor costs */
2047 const int align_loop; /* Default alignments. */
2048 const int align_loop_max_skip;
2049 const int align_jump;
2050 const int align_jump_max_skip;
2051 const int align_func;
2054 static const struct ptt processor_target_table[PROCESSOR_max] =
2056 {&i386_cost, 4, 3, 4, 3, 4},
2057 {&i486_cost, 16, 15, 16, 15, 16},
2058 {&pentium_cost, 16, 7, 16, 7, 16},
2059 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2060 {&geode_cost, 0, 0, 0, 0, 0},
2061 {&k6_cost, 32, 7, 32, 7, 32},
2062 {&athlon_cost, 16, 7, 16, 7, 16},
2063 {&pentium4_cost, 0, 0, 0, 0, 0},
2064 {&k8_cost, 16, 7, 16, 7, 16},
2065 {&nocona_cost, 0, 0, 0, 0, 0},
2066 {&core2_cost, 16, 10, 16, 10, 16},
2067 {&generic32_cost, 16, 7, 16, 7, 16},
2068 {&generic64_cost, 16, 10, 16, 10, 16},
2069 {&amdfam10_cost, 32, 24, 32, 7, 32},
2070 {&atom_cost, 16, 7, 16, 7, 16}
2073 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2099 /* Implement TARGET_HANDLE_OPTION. */
2102 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2109 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2110 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2114 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2122 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2123 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2127 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2128 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2138 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2139 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2143 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2144 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2151 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2152 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2156 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2157 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2164 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2165 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2169 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2170 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2177 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2178 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2182 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2183 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2190 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2191 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2195 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2196 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2203 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2204 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2208 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2209 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2216 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2221 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2229 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2240 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2245 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2252 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2253 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2257 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2265 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2266 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2270 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2278 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2279 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2283 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2291 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2292 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2296 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2304 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2305 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2309 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2317 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2318 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2322 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2330 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2331 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2335 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2343 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2344 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2348 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2356 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2357 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2361 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2362 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2369 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2370 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2374 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2375 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2382 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2383 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2387 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2388 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2395 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2396 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2400 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2401 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2410 /* Return a string that documents the current -m options. The caller is
2411 responsible for freeing the string. */
2414 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2415 const char *fpmath, bool add_nl_p)
2417 struct ix86_target_opts
2419 const char *option; /* option string */
2420 int mask; /* isa mask options */
2423 /* This table is ordered so that options like -msse4.2 that imply
2424 preceding options while match those first. */
2425 static struct ix86_target_opts isa_opts[] =
2427 { "-m64", OPTION_MASK_ISA_64BIT },
2428 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2429 { "-mfma", OPTION_MASK_ISA_FMA },
2430 { "-mxop", OPTION_MASK_ISA_XOP },
2431 { "-mlwp", OPTION_MASK_ISA_LWP },
2432 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2433 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2434 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2435 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2436 { "-msse3", OPTION_MASK_ISA_SSE3 },
2437 { "-msse2", OPTION_MASK_ISA_SSE2 },
2438 { "-msse", OPTION_MASK_ISA_SSE },
2439 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2440 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2441 { "-mmmx", OPTION_MASK_ISA_MMX },
2442 { "-mabm", OPTION_MASK_ISA_ABM },
2443 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2444 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2445 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2446 { "-maes", OPTION_MASK_ISA_AES },
2447 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2451 static struct ix86_target_opts flag_opts[] =
2453 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2454 { "-m80387", MASK_80387 },
2455 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2456 { "-malign-double", MASK_ALIGN_DOUBLE },
2457 { "-mcld", MASK_CLD },
2458 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2459 { "-mieee-fp", MASK_IEEE_FP },
2460 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2461 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2462 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2463 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2464 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2465 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2466 { "-mno-red-zone", MASK_NO_RED_ZONE },
2467 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2468 { "-mrecip", MASK_RECIP },
2469 { "-mrtd", MASK_RTD },
2470 { "-msseregparm", MASK_SSEREGPARM },
2471 { "-mstack-arg-probe", MASK_STACK_PROBE },
2472 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2475 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2478 char target_other[40];
2487 memset (opts, '\0', sizeof (opts));
2489 /* Add -march= option. */
2492 opts[num][0] = "-march=";
2493 opts[num++][1] = arch;
2496 /* Add -mtune= option. */
2499 opts[num][0] = "-mtune=";
2500 opts[num++][1] = tune;
2503 /* Pick out the options in isa options. */
2504 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2506 if ((isa & isa_opts[i].mask) != 0)
2508 opts[num++][0] = isa_opts[i].option;
2509 isa &= ~ isa_opts[i].mask;
2513 if (isa && add_nl_p)
2515 opts[num++][0] = isa_other;
2516 sprintf (isa_other, "(other isa: %#x)", isa);
2519 /* Add flag options. */
2520 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2522 if ((flags & flag_opts[i].mask) != 0)
2524 opts[num++][0] = flag_opts[i].option;
2525 flags &= ~ flag_opts[i].mask;
2529 if (flags && add_nl_p)
2531 opts[num++][0] = target_other;
2532 sprintf (target_other, "(other flags: %#x)", flags);
2535 /* Add -fpmath= option. */
2538 opts[num][0] = "-mfpmath=";
2539 opts[num++][1] = fpmath;
2546 gcc_assert (num < ARRAY_SIZE (opts));
2548 /* Size the string. */
2550 sep_len = (add_nl_p) ? 3 : 1;
2551 for (i = 0; i < num; i++)
2554 for (j = 0; j < 2; j++)
2556 len += strlen (opts[i][j]);
2559 /* Build the string. */
2560 ret = ptr = (char *) xmalloc (len);
2563 for (i = 0; i < num; i++)
2567 for (j = 0; j < 2; j++)
2568 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2575 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2583 for (j = 0; j < 2; j++)
2586 memcpy (ptr, opts[i][j], len2[j]);
2588 line_len += len2[j];
2593 gcc_assert (ret + len >= ptr);
2598 /* Function that is callable from the debugger to print the current
2601 ix86_debug_options (void)
2603 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2604 ix86_arch_string, ix86_tune_string,
2605 ix86_fpmath_string, true);
2609 fprintf (stderr, "%s\n\n", opts);
2613 fputs ("<no options>\n\n", stderr);
2618 /* Sometimes certain combinations of command options do not make
2619 sense on a particular target machine. You can define a macro
2620 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2621 defined, is executed once just after all the command options have
2624 Don't use this macro to turn on various extra optimizations for
2625 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2628 override_options (bool main_args_p)
2631 unsigned int ix86_arch_mask, ix86_tune_mask;
2632 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2637 /* Comes from final.c -- no real reason to change it. */
2638 #define MAX_CODE_ALIGN 16
2646 PTA_PREFETCH_SSE = 1 << 4,
2648 PTA_3DNOW_A = 1 << 6,
2652 PTA_POPCNT = 1 << 10,
2654 PTA_SSE4A = 1 << 12,
2655 PTA_NO_SAHF = 1 << 13,
2656 PTA_SSE4_1 = 1 << 14,
2657 PTA_SSE4_2 = 1 << 15,
2659 PTA_PCLMUL = 1 << 17,
2662 PTA_MOVBE = 1 << 20,
2670 const char *const name; /* processor name or nickname. */
2671 const enum processor_type processor;
2672 const enum attr_cpu schedule;
2673 const unsigned /*enum pta_flags*/ flags;
2675 const processor_alias_table[] =
2677 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2678 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2679 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2680 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2681 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2682 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2683 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2684 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2685 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2686 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2687 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2688 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2689 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2691 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2693 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2694 PTA_MMX | PTA_SSE | PTA_SSE2},
2695 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2696 PTA_MMX |PTA_SSE | PTA_SSE2},
2697 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2698 PTA_MMX | PTA_SSE | PTA_SSE2},
2699 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2700 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2701 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2702 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2703 | PTA_CX16 | PTA_NO_SAHF},
2704 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2705 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2706 | PTA_SSSE3 | PTA_CX16},
2707 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2708 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2709 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2710 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2711 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2712 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2713 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2714 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2715 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2716 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2717 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2718 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2719 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2720 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2721 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2722 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2723 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2724 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2725 {"x86-64", PROCESSOR_K8, CPU_K8,
2726 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2727 {"k8", PROCESSOR_K8, CPU_K8,
2728 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2729 | PTA_SSE2 | PTA_NO_SAHF},
2730 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2731 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2732 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2733 {"opteron", PROCESSOR_K8, CPU_K8,
2734 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2735 | PTA_SSE2 | PTA_NO_SAHF},
2736 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2737 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2738 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2739 {"athlon64", PROCESSOR_K8, CPU_K8,
2740 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2741 | PTA_SSE2 | PTA_NO_SAHF},
2742 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2743 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2744 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2745 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2746 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2747 | PTA_SSE2 | PTA_NO_SAHF},
2748 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2749 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2750 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2751 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2752 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2753 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2754 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2755 0 /* flags are only used for -march switch. */ },
2756 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2757 PTA_64BIT /* flags are only used for -march switch. */ },
2760 int const pta_size = ARRAY_SIZE (processor_alias_table);
2762 /* Set up prefix/suffix so the error messages refer to either the command
2763 line argument, or the attribute(target). */
2772 prefix = "option(\"";
2777 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2778 SUBTARGET_OVERRIDE_OPTIONS;
2781 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2782 SUBSUBTARGET_OVERRIDE_OPTIONS;
2785 /* -fPIC is the default for x86_64. */
2786 if (TARGET_MACHO && TARGET_64BIT)
2789 /* Set the default values for switches whose default depends on TARGET_64BIT
2790 in case they weren't overwritten by command line options. */
2793 /* Mach-O doesn't support omitting the frame pointer for now. */
2794 if (flag_omit_frame_pointer == 2)
2795 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2796 if (flag_asynchronous_unwind_tables == 2)
2797 flag_asynchronous_unwind_tables = 1;
2798 if (flag_pcc_struct_return == 2)
2799 flag_pcc_struct_return = 0;
2803 if (flag_omit_frame_pointer == 2)
2804 flag_omit_frame_pointer = 0;
2805 if (flag_asynchronous_unwind_tables == 2)
2806 flag_asynchronous_unwind_tables = 0;
2807 if (flag_pcc_struct_return == 2)
2808 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2811 /* Need to check -mtune=generic first. */
2812 if (ix86_tune_string)
2814 if (!strcmp (ix86_tune_string, "generic")
2815 || !strcmp (ix86_tune_string, "i686")
2816 /* As special support for cross compilers we read -mtune=native
2817 as -mtune=generic. With native compilers we won't see the
2818 -mtune=native, as it was changed by the driver. */
2819 || !strcmp (ix86_tune_string, "native"))
2822 ix86_tune_string = "generic64";
2824 ix86_tune_string = "generic32";
2826 /* If this call is for setting the option attribute, allow the
2827 generic32/generic64 that was previously set. */
2828 else if (!main_args_p
2829 && (!strcmp (ix86_tune_string, "generic32")
2830 || !strcmp (ix86_tune_string, "generic64")))
2832 else if (!strncmp (ix86_tune_string, "generic", 7))
2833 error ("bad value (%s) for %stune=%s %s",
2834 ix86_tune_string, prefix, suffix, sw);
2835 else if (!strcmp (ix86_tune_string, "x86-64"))
2836 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2837 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2838 prefix, suffix, prefix, suffix, prefix, suffix);
2842 if (ix86_arch_string)
2843 ix86_tune_string = ix86_arch_string;
2844 if (!ix86_tune_string)
2846 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2847 ix86_tune_defaulted = 1;
2850 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2851 need to use a sensible tune option. */
2852 if (!strcmp (ix86_tune_string, "generic")
2853 || !strcmp (ix86_tune_string, "x86-64")
2854 || !strcmp (ix86_tune_string, "i686"))
2857 ix86_tune_string = "generic64";
2859 ix86_tune_string = "generic32";
2863 if (ix86_stringop_string)
2865 if (!strcmp (ix86_stringop_string, "rep_byte"))
2866 stringop_alg = rep_prefix_1_byte;
2867 else if (!strcmp (ix86_stringop_string, "libcall"))
2868 stringop_alg = libcall;
2869 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2870 stringop_alg = rep_prefix_4_byte;
2871 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2873 /* rep; movq isn't available in 32-bit code. */
2874 stringop_alg = rep_prefix_8_byte;
2875 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2876 stringop_alg = loop_1_byte;
2877 else if (!strcmp (ix86_stringop_string, "loop"))
2878 stringop_alg = loop;
2879 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2880 stringop_alg = unrolled_loop;
2882 error ("bad value (%s) for %sstringop-strategy=%s %s",
2883 ix86_stringop_string, prefix, suffix, sw);
2886 if (!ix86_arch_string)
2887 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
2889 ix86_arch_specified = 1;
2891 /* Validate -mabi= value. */
2892 if (ix86_abi_string)
2894 if (strcmp (ix86_abi_string, "sysv") == 0)
2895 ix86_abi = SYSV_ABI;
2896 else if (strcmp (ix86_abi_string, "ms") == 0)
2899 error ("unknown ABI (%s) for %sabi=%s %s",
2900 ix86_abi_string, prefix, suffix, sw);
2903 ix86_abi = DEFAULT_ABI;
2905 if (ix86_cmodel_string != 0)
2907 if (!strcmp (ix86_cmodel_string, "small"))
2908 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2909 else if (!strcmp (ix86_cmodel_string, "medium"))
2910 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2911 else if (!strcmp (ix86_cmodel_string, "large"))
2912 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2914 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2915 else if (!strcmp (ix86_cmodel_string, "32"))
2916 ix86_cmodel = CM_32;
2917 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2918 ix86_cmodel = CM_KERNEL;
2920 error ("bad value (%s) for %scmodel=%s %s",
2921 ix86_cmodel_string, prefix, suffix, sw);
2925 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2926 use of rip-relative addressing. This eliminates fixups that
2927 would otherwise be needed if this object is to be placed in a
2928 DLL, and is essentially just as efficient as direct addressing. */
2929 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2930 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2931 else if (TARGET_64BIT)
2932 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2934 ix86_cmodel = CM_32;
2936 if (ix86_asm_string != 0)
2939 && !strcmp (ix86_asm_string, "intel"))
2940 ix86_asm_dialect = ASM_INTEL;
2941 else if (!strcmp (ix86_asm_string, "att"))
2942 ix86_asm_dialect = ASM_ATT;
2944 error ("bad value (%s) for %sasm=%s %s",
2945 ix86_asm_string, prefix, suffix, sw);
2947 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2948 error ("code model %qs not supported in the %s bit mode",
2949 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2950 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2951 sorry ("%i-bit mode not compiled in",
2952 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2954 for (i = 0; i < pta_size; i++)
2955 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2957 ix86_schedule = processor_alias_table[i].schedule;
2958 ix86_arch = processor_alias_table[i].processor;
2959 /* Default cpu tuning to the architecture. */
2960 ix86_tune = ix86_arch;
2962 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2963 error ("CPU you selected does not support x86-64 "
2966 if (processor_alias_table[i].flags & PTA_MMX
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2968 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2969 if (processor_alias_table[i].flags & PTA_3DNOW
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2971 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2972 if (processor_alias_table[i].flags & PTA_3DNOW_A
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2974 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2975 if (processor_alias_table[i].flags & PTA_SSE
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2977 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2978 if (processor_alias_table[i].flags & PTA_SSE2
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2980 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2981 if (processor_alias_table[i].flags & PTA_SSE3
2982 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2983 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2984 if (processor_alias_table[i].flags & PTA_SSSE3
2985 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2986 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2987 if (processor_alias_table[i].flags & PTA_SSE4_1
2988 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2989 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2990 if (processor_alias_table[i].flags & PTA_SSE4_2
2991 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2992 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2993 if (processor_alias_table[i].flags & PTA_AVX
2994 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2995 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2996 if (processor_alias_table[i].flags & PTA_FMA
2997 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2998 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2999 if (processor_alias_table[i].flags & PTA_SSE4A
3000 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3001 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3002 if (processor_alias_table[i].flags & PTA_FMA4
3003 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3004 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3005 if (processor_alias_table[i].flags & PTA_XOP
3006 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3007 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3008 if (processor_alias_table[i].flags & PTA_LWP
3009 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3010 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3011 if (processor_alias_table[i].flags & PTA_ABM
3012 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3013 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3014 if (processor_alias_table[i].flags & PTA_CX16
3015 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3016 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3017 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3018 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3019 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3020 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3021 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3022 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3023 if (processor_alias_table[i].flags & PTA_MOVBE
3024 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3025 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3026 if (processor_alias_table[i].flags & PTA_AES
3027 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3028 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3029 if (processor_alias_table[i].flags & PTA_PCLMUL
3030 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3031 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3032 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3033 x86_prefetch_sse = true;
3038 if (!strcmp (ix86_arch_string, "generic"))
3039 error ("generic CPU can be used only for %stune=%s %s",
3040 prefix, suffix, sw);
3041 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3042 error ("bad value (%s) for %sarch=%s %s",
3043 ix86_arch_string, prefix, suffix, sw);
3045 ix86_arch_mask = 1u << ix86_arch;
3046 for (i = 0; i < X86_ARCH_LAST; ++i)
3047 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3049 for (i = 0; i < pta_size; i++)
3050 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3052 ix86_schedule = processor_alias_table[i].schedule;
3053 ix86_tune = processor_alias_table[i].processor;
3054 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3056 if (ix86_tune_defaulted)
3058 ix86_tune_string = "x86-64";
3059 for (i = 0; i < pta_size; i++)
3060 if (! strcmp (ix86_tune_string,
3061 processor_alias_table[i].name))
3063 ix86_schedule = processor_alias_table[i].schedule;
3064 ix86_tune = processor_alias_table[i].processor;
3067 error ("CPU you selected does not support x86-64 "
3070 /* Intel CPUs have always interpreted SSE prefetch instructions as
3071 NOPs; so, we can enable SSE prefetch instructions even when
3072 -mtune (rather than -march) points us to a processor that has them.
3073 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3074 higher processors. */
3076 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3077 x86_prefetch_sse = true;
3081 if (ix86_tune_specified && i == pta_size)
3082 error ("bad value (%s) for %stune=%s %s",
3083 ix86_tune_string, prefix, suffix, sw);
3085 ix86_tune_mask = 1u << ix86_tune;
3086 for (i = 0; i < X86_TUNE_LAST; ++i)
3087 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3090 ix86_cost = &ix86_size_cost;
3092 ix86_cost = processor_target_table[ix86_tune].cost;
3094 /* Arrange to set up i386_stack_locals for all functions. */
3095 init_machine_status = ix86_init_machine_status;
3097 /* Validate -mregparm= value. */
3098 if (ix86_regparm_string)
3101 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3102 i = atoi (ix86_regparm_string);
3103 if (i < 0 || i > REGPARM_MAX)
3104 error ("%sregparm=%d%s is not between 0 and %d",
3105 prefix, i, suffix, REGPARM_MAX);
3110 ix86_regparm = REGPARM_MAX;
3112 /* If the user has provided any of the -malign-* options,
3113 warn and use that value only if -falign-* is not set.
3114 Remove this code in GCC 3.2 or later. */
3115 if (ix86_align_loops_string)
3117 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3118 prefix, suffix, suffix);
3119 if (align_loops == 0)
3121 i = atoi (ix86_align_loops_string);
3122 if (i < 0 || i > MAX_CODE_ALIGN)
3123 error ("%salign-loops=%d%s is not between 0 and %d",
3124 prefix, i, suffix, MAX_CODE_ALIGN);
3126 align_loops = 1 << i;
3130 if (ix86_align_jumps_string)
3132 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3133 prefix, suffix, suffix);
3134 if (align_jumps == 0)
3136 i = atoi (ix86_align_jumps_string);
3137 if (i < 0 || i > MAX_CODE_ALIGN)
3138 error ("%salign-loops=%d%s is not between 0 and %d",
3139 prefix, i, suffix, MAX_CODE_ALIGN);
3141 align_jumps = 1 << i;
3145 if (ix86_align_funcs_string)
3147 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3148 prefix, suffix, suffix);
3149 if (align_functions == 0)
3151 i = atoi (ix86_align_funcs_string);
3152 if (i < 0 || i > MAX_CODE_ALIGN)
3153 error ("%salign-loops=%d%s is not between 0 and %d",
3154 prefix, i, suffix, MAX_CODE_ALIGN);
3156 align_functions = 1 << i;
3160 /* Default align_* from the processor table. */
3161 if (align_loops == 0)
3163 align_loops = processor_target_table[ix86_tune].align_loop;
3164 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3166 if (align_jumps == 0)
3168 align_jumps = processor_target_table[ix86_tune].align_jump;
3169 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3171 if (align_functions == 0)
3173 align_functions = processor_target_table[ix86_tune].align_func;
3176 /* Validate -mbranch-cost= value, or provide default. */
3177 ix86_branch_cost = ix86_cost->branch_cost;
3178 if (ix86_branch_cost_string)
3180 i = atoi (ix86_branch_cost_string);
3182 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3184 ix86_branch_cost = i;
3186 if (ix86_section_threshold_string)
3188 i = atoi (ix86_section_threshold_string);
3190 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3192 ix86_section_threshold = i;
3195 if (ix86_tls_dialect_string)
3197 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3198 ix86_tls_dialect = TLS_DIALECT_GNU;
3199 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3200 ix86_tls_dialect = TLS_DIALECT_GNU2;
3202 error ("bad value (%s) for %stls-dialect=%s %s",
3203 ix86_tls_dialect_string, prefix, suffix, sw);
3206 if (ix87_precision_string)
3208 i = atoi (ix87_precision_string);
3209 if (i != 32 && i != 64 && i != 80)
3210 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3215 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3217 /* Enable by default the SSE and MMX builtins. Do allow the user to
3218 explicitly disable any of these. In particular, disabling SSE and
3219 MMX for kernel code is extremely useful. */
3220 if (!ix86_arch_specified)
3222 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3223 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3226 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3230 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3232 if (!ix86_arch_specified)
3234 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3236 /* i386 ABI does not specify red zone. It still makes sense to use it
3237 when programmer takes care to stack from being destroyed. */
3238 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3239 target_flags |= MASK_NO_RED_ZONE;
3242 /* Keep nonleaf frame pointers. */
3243 if (flag_omit_frame_pointer)
3244 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3245 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3246 flag_omit_frame_pointer = 1;
3248 /* If we're doing fast math, we don't care about comparison order
3249 wrt NaNs. This lets us use a shorter comparison sequence. */
3250 if (flag_finite_math_only)
3251 target_flags &= ~MASK_IEEE_FP;
3253 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3254 since the insns won't need emulation. */
3255 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3256 target_flags &= ~MASK_NO_FANCY_MATH_387;
3258 /* Likewise, if the target doesn't have a 387, or we've specified
3259 software floating point, don't use 387 inline intrinsics. */
3261 target_flags |= MASK_NO_FANCY_MATH_387;
3263 /* Turn on MMX builtins for -msse. */
3266 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3267 x86_prefetch_sse = true;
3270 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3271 if (TARGET_SSE4_2 || TARGET_ABM)
3272 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3274 /* Validate -mpreferred-stack-boundary= value or default it to
3275 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3276 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3277 if (ix86_preferred_stack_boundary_string)
3279 i = atoi (ix86_preferred_stack_boundary_string);
3280 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3281 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3282 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3284 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3287 /* Set the default value for -mstackrealign. */
3288 if (ix86_force_align_arg_pointer == -1)
3289 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3291 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3293 /* Validate -mincoming-stack-boundary= value or default it to
3294 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3295 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3296 if (ix86_incoming_stack_boundary_string)
3298 i = atoi (ix86_incoming_stack_boundary_string);
3299 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3300 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3301 i, TARGET_64BIT ? 4 : 2);
3304 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3305 ix86_incoming_stack_boundary
3306 = ix86_user_incoming_stack_boundary;
3310 /* Accept -msseregparm only if at least SSE support is enabled. */
3311 if (TARGET_SSEREGPARM
3313 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3315 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3316 if (ix86_fpmath_string != 0)
3318 if (! strcmp (ix86_fpmath_string, "387"))
3319 ix86_fpmath = FPMATH_387;
3320 else if (! strcmp (ix86_fpmath_string, "sse"))
3324 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3325 ix86_fpmath = FPMATH_387;
3328 ix86_fpmath = FPMATH_SSE;
3330 else if (! strcmp (ix86_fpmath_string, "387,sse")
3331 || ! strcmp (ix86_fpmath_string, "387+sse")
3332 || ! strcmp (ix86_fpmath_string, "sse,387")
3333 || ! strcmp (ix86_fpmath_string, "sse+387")
3334 || ! strcmp (ix86_fpmath_string, "both"))
3338 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3339 ix86_fpmath = FPMATH_387;
3341 else if (!TARGET_80387)
3343 warning (0, "387 instruction set disabled, using SSE arithmetics");
3344 ix86_fpmath = FPMATH_SSE;
3347 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3350 error ("bad value (%s) for %sfpmath=%s %s",
3351 ix86_fpmath_string, prefix, suffix, sw);
3354 /* If the i387 is disabled, then do not return values in it. */
3356 target_flags &= ~MASK_FLOAT_RETURNS;
3358 /* Use external vectorized library in vectorizing intrinsics. */
3359 if (ix86_veclibabi_string)
3361 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3362 ix86_veclib_handler = ix86_veclibabi_svml;
3363 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3364 ix86_veclib_handler = ix86_veclibabi_acml;
3366 error ("unknown vectorization library ABI type (%s) for "
3367 "%sveclibabi=%s %s", ix86_veclibabi_string,
3368 prefix, suffix, sw);
3371 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3372 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3374 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3376 /* ??? Unwind info is not correct around the CFG unless either a frame
3377 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3378 unwind info generation to be aware of the CFG and propagating states
3380 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3381 || flag_exceptions || flag_non_call_exceptions)
3382 && flag_omit_frame_pointer
3383 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3385 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3386 warning (0, "unwind tables currently require either a frame pointer "
3387 "or %saccumulate-outgoing-args%s for correctness",
3389 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3392 /* If stack probes are required, the space used for large function
3393 arguments on the stack must also be probed, so enable
3394 -maccumulate-outgoing-args so this happens in the prologue. */
3395 if (TARGET_STACK_PROBE
3396 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3398 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3399 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3400 "for correctness", prefix, suffix);
3401 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3404 /* For sane SSE instruction set generation we need fcomi instruction.
3405 It is safe to enable all CMOVE instructions. */
3409 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3412 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3413 p = strchr (internal_label_prefix, 'X');
3414 internal_label_prefix_len = p - internal_label_prefix;
3418 /* When scheduling description is not available, disable scheduler pass
3419 so it won't slow down the compilation and make x87 code slower. */
3420 if (!TARGET_SCHEDULE)
3421 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3423 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3424 set_param_value ("simultaneous-prefetches",
3425 ix86_cost->simultaneous_prefetches);
3426 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3427 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3428 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3429 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3430 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3431 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3433 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3434 can be optimized to ap = __builtin_next_arg (0). */
3436 targetm.expand_builtin_va_start = NULL;
3440 ix86_gen_leave = gen_leave_rex64;
3441 ix86_gen_pop1 = gen_popdi1;
3442 ix86_gen_add3 = gen_adddi3;
3443 ix86_gen_sub3 = gen_subdi3;
3444 ix86_gen_sub3_carry = gen_subdi3_carry;
3445 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3446 ix86_gen_monitor = gen_sse3_monitor64;
3447 ix86_gen_andsp = gen_anddi3;
3451 ix86_gen_leave = gen_leave;
3452 ix86_gen_pop1 = gen_popsi1;
3453 ix86_gen_add3 = gen_addsi3;
3454 ix86_gen_sub3 = gen_subsi3;
3455 ix86_gen_sub3_carry = gen_subsi3_carry;
3456 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3457 ix86_gen_monitor = gen_sse3_monitor;
3458 ix86_gen_andsp = gen_andsi3;
3462 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3464 target_flags |= MASK_CLD & ~target_flags_explicit;
3467 /* Save the initial options in case the user does function specific options */
3469 target_option_default_node = target_option_current_node
3470 = build_target_option_node ();
3473 /* Update register usage after having seen the compiler flags. */
3476 ix86_conditional_register_usage (void)
3481 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3483 if (fixed_regs[i] > 1)
3484 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3485 if (call_used_regs[i] > 1)
3486 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3489 /* The PIC register, if it exists, is fixed. */
3490 j = PIC_OFFSET_TABLE_REGNUM;
3491 if (j != INVALID_REGNUM)
3492 fixed_regs[j] = call_used_regs[j] = 1;
3494 /* The MS_ABI changes the set of call-used registers. */
3495 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3497 call_used_regs[SI_REG] = 0;
3498 call_used_regs[DI_REG] = 0;
3499 call_used_regs[XMM6_REG] = 0;
3500 call_used_regs[XMM7_REG] = 0;
3501 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3502 call_used_regs[i] = 0;
3505 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3506 other call-clobbered regs for 64-bit. */
3509 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3511 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3512 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3513 && call_used_regs[i])
3514 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3517 /* If MMX is disabled, squash the registers. */
3519 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3520 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3521 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3523 /* If SSE is disabled, squash the registers. */
3525 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3526 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3527 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3529 /* If the FPU is disabled, squash the registers. */
3530 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3531 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3532 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3533 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3535 /* If 32-bit, squash the 64-bit registers. */
3538 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3540 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3546 /* Save the current options */
3549 ix86_function_specific_save (struct cl_target_option *ptr)
3551 ptr->arch = ix86_arch;
3552 ptr->schedule = ix86_schedule;
3553 ptr->tune = ix86_tune;
3554 ptr->fpmath = ix86_fpmath;
3555 ptr->branch_cost = ix86_branch_cost;
3556 ptr->tune_defaulted = ix86_tune_defaulted;
3557 ptr->arch_specified = ix86_arch_specified;
3558 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3559 ptr->target_flags_explicit = target_flags_explicit;
3561 /* The fields are char but the variables are not; make sure the
3562 values fit in the fields. */
3563 gcc_assert (ptr->arch == ix86_arch);
3564 gcc_assert (ptr->schedule == ix86_schedule);
3565 gcc_assert (ptr->tune == ix86_tune);
3566 gcc_assert (ptr->fpmath == ix86_fpmath);
3567 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3570 /* Restore the current options */
3573 ix86_function_specific_restore (struct cl_target_option *ptr)
3575 enum processor_type old_tune = ix86_tune;
3576 enum processor_type old_arch = ix86_arch;
3577 unsigned int ix86_arch_mask, ix86_tune_mask;
3580 ix86_arch = (enum processor_type) ptr->arch;
3581 ix86_schedule = (enum attr_cpu) ptr->schedule;
3582 ix86_tune = (enum processor_type) ptr->tune;
3583 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3584 ix86_branch_cost = ptr->branch_cost;
3585 ix86_tune_defaulted = ptr->tune_defaulted;
3586 ix86_arch_specified = ptr->arch_specified;
3587 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3588 target_flags_explicit = ptr->target_flags_explicit;
3590 /* Recreate the arch feature tests if the arch changed */
3591 if (old_arch != ix86_arch)
3593 ix86_arch_mask = 1u << ix86_arch;
3594 for (i = 0; i < X86_ARCH_LAST; ++i)
3595 ix86_arch_features[i]
3596 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3599 /* Recreate the tune optimization tests */
3600 if (old_tune != ix86_tune)
3602 ix86_tune_mask = 1u << ix86_tune;
3603 for (i = 0; i < X86_TUNE_LAST; ++i)
3604 ix86_tune_features[i]
3605 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3609 /* Print the current options */
3612 ix86_function_specific_print (FILE *file, int indent,
3613 struct cl_target_option *ptr)
3616 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3617 NULL, NULL, NULL, false);
3619 fprintf (file, "%*sarch = %d (%s)\n",
3622 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3623 ? cpu_names[ptr->arch]
3626 fprintf (file, "%*stune = %d (%s)\n",
3629 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3630 ? cpu_names[ptr->tune]
3633 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3634 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3635 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3636 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3640 fprintf (file, "%*s%s\n", indent, "", target_string);
3641 free (target_string);
3646 /* Inner function to process the attribute((target(...))), take an argument and
3647 set the current options from the argument. If we have a list, recursively go
3651 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3656 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3657 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3658 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3659 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3674 enum ix86_opt_type type;
3679 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3680 IX86_ATTR_ISA ("abm", OPT_mabm),
3681 IX86_ATTR_ISA ("aes", OPT_maes),
3682 IX86_ATTR_ISA ("avx", OPT_mavx),
3683 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3684 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3685 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3686 IX86_ATTR_ISA ("sse", OPT_msse),
3687 IX86_ATTR_ISA ("sse2", OPT_msse2),
3688 IX86_ATTR_ISA ("sse3", OPT_msse3),
3689 IX86_ATTR_ISA ("sse4", OPT_msse4),
3690 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3691 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3692 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3693 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3694 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3695 IX86_ATTR_ISA ("xop", OPT_mxop),
3696 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3698 /* string options */
3699 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3700 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3701 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3704 IX86_ATTR_YES ("cld",
3708 IX86_ATTR_NO ("fancy-math-387",
3709 OPT_mfancy_math_387,
3710 MASK_NO_FANCY_MATH_387),
3712 IX86_ATTR_YES ("ieee-fp",
3716 IX86_ATTR_YES ("inline-all-stringops",
3717 OPT_minline_all_stringops,
3718 MASK_INLINE_ALL_STRINGOPS),
3720 IX86_ATTR_YES ("inline-stringops-dynamically",
3721 OPT_minline_stringops_dynamically,
3722 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3724 IX86_ATTR_NO ("align-stringops",
3725 OPT_mno_align_stringops,
3726 MASK_NO_ALIGN_STRINGOPS),
3728 IX86_ATTR_YES ("recip",
3734 /* If this is a list, recurse to get the options. */
3735 if (TREE_CODE (args) == TREE_LIST)
3739 for (; args; args = TREE_CHAIN (args))
3740 if (TREE_VALUE (args)
3741 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3747 else if (TREE_CODE (args) != STRING_CST)
3750 /* Handle multiple arguments separated by commas. */
3751 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3753 while (next_optstr && *next_optstr != '\0')
3755 char *p = next_optstr;
3757 char *comma = strchr (next_optstr, ',');
3758 const char *opt_string;
3759 size_t len, opt_len;
3764 enum ix86_opt_type type = ix86_opt_unknown;
3770 len = comma - next_optstr;
3771 next_optstr = comma + 1;
3779 /* Recognize no-xxx. */
3780 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3789 /* Find the option. */
3792 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3794 type = attrs[i].type;
3795 opt_len = attrs[i].len;
3796 if (ch == attrs[i].string[0]
3797 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3798 && memcmp (p, attrs[i].string, opt_len) == 0)
3801 mask = attrs[i].mask;
3802 opt_string = attrs[i].string;
3807 /* Process the option. */
3810 error ("attribute(target(\"%s\")) is unknown", orig_p);
3814 else if (type == ix86_opt_isa)
3815 ix86_handle_option (opt, p, opt_set_p);