1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE */
1232 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1254 static enum stringop_alg stringop_alg = no_stringop;
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1284 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1288 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1292 /* The "default" register map used in 32bit mode. */
1294 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1296 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1297 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1298 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1299 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1300 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1302 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1305 static int const x86_64_int_parameter_registers[6] =
1307 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1308 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1311 static int const x86_64_ms_abi_int_parameter_registers[4] =
1313 2 /*RCX*/, 1 /*RDX*/,
1314 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1317 static int const x86_64_int_return_registers[4] =
1319 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1322 /* The "default" register map used in 64bit mode. */
1323 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1325 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1326 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1327 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1328 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1329 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1330 8,9,10,11,12,13,14,15, /* extended integer registers */
1331 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1334 /* Define the register numbers to be used in Dwarf debugging information.
1335 The SVR4 reference port C compiler uses the following register numbers
1336 in its Dwarf output code:
1337 0 for %eax (gcc regno = 0)
1338 1 for %ecx (gcc regno = 2)
1339 2 for %edx (gcc regno = 1)
1340 3 for %ebx (gcc regno = 3)
1341 4 for %esp (gcc regno = 7)
1342 5 for %ebp (gcc regno = 6)
1343 6 for %esi (gcc regno = 4)
1344 7 for %edi (gcc regno = 5)
1345 The following three DWARF register numbers are never generated by
1346 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1347 believes these numbers have these meanings.
1348 8 for %eip (no gcc equivalent)
1349 9 for %eflags (gcc regno = 17)
1350 10 for %trapno (no gcc equivalent)
1351 It is not at all clear how we should number the FP stack registers
1352 for the x86 architecture. If the version of SDB on x86/svr4 were
1353 a bit less brain dead with respect to floating-point then we would
1354 have a precedent to follow with respect to DWARF register numbers
1355 for x86 FP registers, but the SDB on x86/svr4 is so completely
1356 broken with respect to FP registers that it is hardly worth thinking
1357 of it as something to strive for compatibility with.
1358 The version of x86/svr4 SDB I have at the moment does (partially)
1359 seem to believe that DWARF register number 11 is associated with
1360 the x86 register %st(0), but that's about all. Higher DWARF
1361 register numbers don't seem to be associated with anything in
1362 particular, and even for DWARF regno 11, SDB only seems to under-
1363 stand that it should say that a variable lives in %st(0) (when
1364 asked via an `=' command) if we said it was in DWARF regno 11,
1365 but SDB still prints garbage when asked for the value of the
1366 variable in question (via a `/' command).
1367 (Also note that the labels SDB prints for various FP stack regs
1368 when doing an `x' command are all wrong.)
1369 Note that these problems generally don't affect the native SVR4
1370 C compiler because it doesn't allow the use of -O with -g and
1371 because when it is *not* optimizing, it allocates a memory
1372 location for each floating-point variable, and the memory
1373 location is what gets described in the DWARF AT_location
1374 attribute for the variable in question.
1375 Regardless of the severe mental illness of the x86/svr4 SDB, we
1376 do something sensible here and we use the following DWARF
1377 register numbers. Note that these are all stack-top-relative
1379 11 for %st(0) (gcc regno = 8)
1380 12 for %st(1) (gcc regno = 9)
1381 13 for %st(2) (gcc regno = 10)
1382 14 for %st(3) (gcc regno = 11)
1383 15 for %st(4) (gcc regno = 12)
1384 16 for %st(5) (gcc regno = 13)
1385 17 for %st(6) (gcc regno = 14)
1386 18 for %st(7) (gcc regno = 15)
1388 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1390 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1391 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1392 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1393 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1394 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1396 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1399 /* Test and compare insns in i386.md store the information needed to
1400 generate branch and scc insns here. */
1402 rtx ix86_compare_op0 = NULL_RTX;
1403 rtx ix86_compare_op1 = NULL_RTX;
1404 rtx ix86_compare_emitted = NULL_RTX;
1406 /* Size of the register save area. */
1407 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1409 /* Define the structure for the machine field in struct function. */
1411 struct stack_local_entry GTY(())
1413 unsigned short mode;
1416 struct stack_local_entry *next;
1419 /* Structure describing stack frame layout.
1420 Stack grows downward:
1426 saved frame pointer if frame_pointer_needed
1427 <- HARD_FRAME_POINTER
1432 [va_arg registers] (
1433 > to_allocate <- FRAME_POINTER
1443 HOST_WIDE_INT frame;
1445 int outgoing_arguments_size;
1448 HOST_WIDE_INT to_allocate;
1449 /* The offsets relative to ARG_POINTER. */
1450 HOST_WIDE_INT frame_pointer_offset;
1451 HOST_WIDE_INT hard_frame_pointer_offset;
1452 HOST_WIDE_INT stack_pointer_offset;
1454 /* When save_regs_using_mov is set, emit prologue using
1455 move instead of push instructions. */
1456 bool save_regs_using_mov;
1459 /* Code model option. */
1460 enum cmodel ix86_cmodel;
1462 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1464 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1466 /* Which unit we are generating floating point math for. */
1467 enum fpmath_unit ix86_fpmath;
1469 /* Which cpu are we scheduling for. */
1470 enum processor_type ix86_tune;
1472 /* Which instruction set architecture to use. */
1473 enum processor_type ix86_arch;
1475 /* true if sse prefetch instruction is not NOOP. */
1476 int x86_prefetch_sse;
1478 /* ix86_regparm_string as a number */
1479 static int ix86_regparm;
1481 /* -mstackrealign option */
1482 extern int ix86_force_align_arg_pointer;
1483 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1485 /* Preferred alignment for stack boundary in bits. */
1486 unsigned int ix86_preferred_stack_boundary;
1488 /* Values 1-5: see jump.c */
1489 int ix86_branch_cost;
1491 /* Variables which are this size or smaller are put in the data/bss
1492 or ldata/lbss sections. */
1494 int ix86_section_threshold = 65536;
1496 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1497 char internal_label_prefix[16];
1498 int internal_label_prefix_len;
1500 /* Register class used for passing given 64bit part of the argument.
1501 These represent classes as documented by the PS ABI, with the exception
1502 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1503 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1505 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1506 whenever possible (upper half does contain padding). */
1507 enum x86_64_reg_class
1510 X86_64_INTEGER_CLASS,
1511 X86_64_INTEGERSI_CLASS,
1518 X86_64_COMPLEX_X87_CLASS,
1521 static const char * const x86_64_reg_class_name[] =
1523 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1524 "sseup", "x87", "x87up", "cplx87", "no"
1527 #define MAX_CLASSES 4
1529 /* Table of constants used by fldpi, fldln2, etc.... */
1530 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1531 static bool ext_80387_constants_init = 0;
1534 static struct machine_function * ix86_init_machine_status (void);
1535 static rtx ix86_function_value (tree, tree, bool);
1536 static int ix86_function_regparm (tree, tree);
1537 static void ix86_compute_frame_layout (struct ix86_frame *);
1538 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1542 /* The svr4 ABI for the i386 says that records and unions are returned
1544 #ifndef DEFAULT_PCC_STRUCT_RETURN
1545 #define DEFAULT_PCC_STRUCT_RETURN 1
1548 /* Implement TARGET_HANDLE_OPTION. */
1551 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1558 target_flags &= ~MASK_3DNOW_A;
1559 target_flags_explicit |= MASK_3DNOW_A;
1566 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1567 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1574 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1584 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1585 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1592 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1593 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1600 target_flags &= ~MASK_SSE4A;
1601 target_flags_explicit |= MASK_SSE4A;
1610 /* Sometimes certain combinations of command options do not make
1611 sense on a particular target machine. You can define a macro
1612 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1613 defined, is executed once just after all the command options have
1616 Don't use this macro to turn on various extra optimizations for
1617 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1620 override_options (void)
1623 int ix86_tune_defaulted = 0;
1624 unsigned int ix86_arch_mask, ix86_tune_mask;
1626 /* Comes from final.c -- no real reason to change it. */
1627 #define MAX_CODE_ALIGN 16
1631 const struct processor_costs *cost; /* Processor costs */
1632 const int target_enable; /* Target flags to enable. */
1633 const int target_disable; /* Target flags to disable. */
1634 const int align_loop; /* Default alignments. */
1635 const int align_loop_max_skip;
1636 const int align_jump;
1637 const int align_jump_max_skip;
1638 const int align_func;
1640 const processor_target_table[PROCESSOR_max] =
1642 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1643 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1644 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1645 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1646 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1647 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1648 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1649 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1650 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1651 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1652 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1654 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1655 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1658 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1661 const char *const name; /* processor name or nickname. */
1662 const enum processor_type processor;
1663 const enum pta_flags
1669 PTA_PREFETCH_SSE = 1 << 4,
1671 PTA_3DNOW_A = 1 << 6,
1675 PTA_POPCNT = 1 << 10,
1677 PTA_SSE4A = 1 << 12,
1678 PTA_NO_SAHF = 1 << 13
1681 const processor_alias_table[] =
1683 {"i386", PROCESSOR_I386, 0},
1684 {"i486", PROCESSOR_I486, 0},
1685 {"i586", PROCESSOR_PENTIUM, 0},
1686 {"pentium", PROCESSOR_PENTIUM, 0},
1687 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1688 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1689 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1690 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1691 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1692 {"i686", PROCESSOR_PENTIUMPRO, 0},
1693 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1694 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1695 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1696 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1697 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1698 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1699 | PTA_MMX | PTA_PREFETCH_SSE},
1700 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1701 | PTA_MMX | PTA_PREFETCH_SSE},
1702 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1703 | PTA_MMX | PTA_PREFETCH_SSE},
1704 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1705 | PTA_MMX | PTA_PREFETCH_SSE
1706 | PTA_CX16 | PTA_NO_SAHF},
1707 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1708 | PTA_64BIT | PTA_MMX
1709 | PTA_PREFETCH_SSE | PTA_CX16},
1710 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1712 {"k6", PROCESSOR_K6, PTA_MMX},
1713 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1714 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1715 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1717 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1718 | PTA_3DNOW | PTA_3DNOW_A},
1719 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1720 | PTA_3DNOW_A | PTA_SSE},
1721 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1722 | PTA_3DNOW_A | PTA_SSE},
1723 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1724 | PTA_3DNOW_A | PTA_SSE},
1725 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1726 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1727 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1728 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1730 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1731 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1732 | PTA_SSE2 | PTA_NO_SAHF},
1733 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1734 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1735 | PTA_SSE2 | PTA_NO_SAHF},
1736 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1737 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1738 | PTA_SSE2 | PTA_NO_SAHF},
1739 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1740 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1741 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1742 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1743 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1744 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1745 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1746 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1747 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1748 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1751 int const pta_size = ARRAY_SIZE (processor_alias_table);
1753 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1754 SUBTARGET_OVERRIDE_OPTIONS;
1757 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1758 SUBSUBTARGET_OVERRIDE_OPTIONS;
1761 /* -fPIC is the default for x86_64. */
1762 if (TARGET_MACHO && TARGET_64BIT)
1765 /* Set the default values for switches whose default depends on TARGET_64BIT
1766 in case they weren't overwritten by command line options. */
1769 /* Mach-O doesn't support omitting the frame pointer for now. */
1770 if (flag_omit_frame_pointer == 2)
1771 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1772 if (flag_asynchronous_unwind_tables == 2)
1773 flag_asynchronous_unwind_tables = 1;
1774 if (flag_pcc_struct_return == 2)
1775 flag_pcc_struct_return = 0;
1779 if (flag_omit_frame_pointer == 2)
1780 flag_omit_frame_pointer = 0;
1781 if (flag_asynchronous_unwind_tables == 2)
1782 flag_asynchronous_unwind_tables = 0;
1783 if (flag_pcc_struct_return == 2)
1784 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1787 /* Need to check -mtune=generic first. */
1788 if (ix86_tune_string)
1790 if (!strcmp (ix86_tune_string, "generic")
1791 || !strcmp (ix86_tune_string, "i686")
1792 /* As special support for cross compilers we read -mtune=native
1793 as -mtune=generic. With native compilers we won't see the
1794 -mtune=native, as it was changed by the driver. */
1795 || !strcmp (ix86_tune_string, "native"))
1798 ix86_tune_string = "generic64";
1800 ix86_tune_string = "generic32";
1802 else if (!strncmp (ix86_tune_string, "generic", 7))
1803 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1807 if (ix86_arch_string)
1808 ix86_tune_string = ix86_arch_string;
1809 if (!ix86_tune_string)
1811 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1812 ix86_tune_defaulted = 1;
1815 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1816 need to use a sensible tune option. */
1817 if (!strcmp (ix86_tune_string, "generic")
1818 || !strcmp (ix86_tune_string, "x86-64")
1819 || !strcmp (ix86_tune_string, "i686"))
1822 ix86_tune_string = "generic64";
1824 ix86_tune_string = "generic32";
1827 if (ix86_stringop_string)
1829 if (!strcmp (ix86_stringop_string, "rep_byte"))
1830 stringop_alg = rep_prefix_1_byte;
1831 else if (!strcmp (ix86_stringop_string, "libcall"))
1832 stringop_alg = libcall;
1833 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1834 stringop_alg = rep_prefix_4_byte;
1835 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1836 stringop_alg = rep_prefix_8_byte;
1837 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1838 stringop_alg = loop_1_byte;
1839 else if (!strcmp (ix86_stringop_string, "loop"))
1840 stringop_alg = loop;
1841 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1842 stringop_alg = unrolled_loop;
1844 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1846 if (!strcmp (ix86_tune_string, "x86-64"))
1847 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1848 "-mtune=generic instead as appropriate.");
1850 if (!ix86_arch_string)
1851 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1852 if (!strcmp (ix86_arch_string, "generic"))
1853 error ("generic CPU can be used only for -mtune= switch");
1854 if (!strncmp (ix86_arch_string, "generic", 7))
1855 error ("bad value (%s) for -march= switch", ix86_arch_string);
1857 if (ix86_cmodel_string != 0)
1859 if (!strcmp (ix86_cmodel_string, "small"))
1860 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1861 else if (!strcmp (ix86_cmodel_string, "medium"))
1862 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1863 else if (!strcmp (ix86_cmodel_string, "large"))
1864 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1866 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1867 else if (!strcmp (ix86_cmodel_string, "32"))
1868 ix86_cmodel = CM_32;
1869 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1870 ix86_cmodel = CM_KERNEL;
1872 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1876 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1877 use of rip-relative addressing. This eliminates fixups that
1878 would otherwise be needed if this object is to be placed in a
1879 DLL, and is essentially just as efficient as direct addressing. */
1880 if (TARGET_64BIT_MS_ABI)
1881 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1882 else if (TARGET_64BIT)
1883 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1885 ix86_cmodel = CM_32;
1887 if (ix86_asm_string != 0)
1890 && !strcmp (ix86_asm_string, "intel"))
1891 ix86_asm_dialect = ASM_INTEL;
1892 else if (!strcmp (ix86_asm_string, "att"))
1893 ix86_asm_dialect = ASM_ATT;
1895 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1897 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1898 error ("code model %qs not supported in the %s bit mode",
1899 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1900 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1901 sorry ("%i-bit mode not compiled in",
1902 (target_flags & MASK_64BIT) ? 64 : 32);
1904 for (i = 0; i < pta_size; i++)
1905 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1907 ix86_arch = processor_alias_table[i].processor;
1908 /* Default cpu tuning to the architecture. */
1909 ix86_tune = ix86_arch;
1910 if (processor_alias_table[i].flags & PTA_MMX
1911 && !(target_flags_explicit & MASK_MMX))
1912 target_flags |= MASK_MMX;
1913 if (processor_alias_table[i].flags & PTA_3DNOW
1914 && !(target_flags_explicit & MASK_3DNOW))
1915 target_flags |= MASK_3DNOW;
1916 if (processor_alias_table[i].flags & PTA_3DNOW_A
1917 && !(target_flags_explicit & MASK_3DNOW_A))
1918 target_flags |= MASK_3DNOW_A;
1919 if (processor_alias_table[i].flags & PTA_SSE
1920 && !(target_flags_explicit & MASK_SSE))
1921 target_flags |= MASK_SSE;
1922 if (processor_alias_table[i].flags & PTA_SSE2
1923 && !(target_flags_explicit & MASK_SSE2))
1924 target_flags |= MASK_SSE2;
1925 if (processor_alias_table[i].flags & PTA_SSE3
1926 && !(target_flags_explicit & MASK_SSE3))
1927 target_flags |= MASK_SSE3;
1928 if (processor_alias_table[i].flags & PTA_SSSE3
1929 && !(target_flags_explicit & MASK_SSSE3))
1930 target_flags |= MASK_SSSE3;
1931 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1932 x86_prefetch_sse = true;
1933 if (processor_alias_table[i].flags & PTA_CX16)
1934 x86_cmpxchg16b = true;
1935 if (processor_alias_table[i].flags & PTA_POPCNT
1936 && !(target_flags_explicit & MASK_POPCNT))
1937 target_flags |= MASK_POPCNT;
1938 if (processor_alias_table[i].flags & PTA_ABM
1939 && !(target_flags_explicit & MASK_ABM))
1940 target_flags |= MASK_ABM;
1941 if (processor_alias_table[i].flags & PTA_SSE4A
1942 && !(target_flags_explicit & MASK_SSE4A))
1943 target_flags |= MASK_SSE4A;
1944 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1946 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1947 error ("CPU you selected does not support x86-64 "
1953 error ("bad value (%s) for -march= switch", ix86_arch_string);
1955 ix86_arch_mask = 1u << ix86_arch;
1956 for (i = 0; i < X86_ARCH_LAST; ++i)
1957 ix86_arch_features[i] &= ix86_arch_mask;
1959 for (i = 0; i < pta_size; i++)
1960 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1962 ix86_tune = processor_alias_table[i].processor;
1963 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1965 if (ix86_tune_defaulted)
1967 ix86_tune_string = "x86-64";
1968 for (i = 0; i < pta_size; i++)
1969 if (! strcmp (ix86_tune_string,
1970 processor_alias_table[i].name))
1972 ix86_tune = processor_alias_table[i].processor;
1975 error ("CPU you selected does not support x86-64 "
1978 /* Intel CPUs have always interpreted SSE prefetch instructions as
1979 NOPs; so, we can enable SSE prefetch instructions even when
1980 -mtune (rather than -march) points us to a processor that has them.
1981 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1982 higher processors. */
1983 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1984 x86_prefetch_sse = true;
1988 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1990 ix86_tune_mask = 1u << ix86_tune;
1991 for (i = 0; i < X86_TUNE_LAST; ++i)
1992 ix86_tune_features[i] &= ix86_tune_mask;
1995 ix86_cost = &size_cost;
1997 ix86_cost = processor_target_table[ix86_tune].cost;
1998 target_flags |= processor_target_table[ix86_tune].target_enable;
1999 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2001 /* Arrange to set up i386_stack_locals for all functions. */
2002 init_machine_status = ix86_init_machine_status;
2004 /* Validate -mregparm= value. */
2005 if (ix86_regparm_string)
2008 warning (0, "-mregparm is ignored in 64-bit mode");
2009 i = atoi (ix86_regparm_string);
2010 if (i < 0 || i > REGPARM_MAX)
2011 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2016 ix86_regparm = REGPARM_MAX;
2018 /* If the user has provided any of the -malign-* options,
2019 warn and use that value only if -falign-* is not set.
2020 Remove this code in GCC 3.2 or later. */
2021 if (ix86_align_loops_string)
2023 warning (0, "-malign-loops is obsolete, use -falign-loops");
2024 if (align_loops == 0)
2026 i = atoi (ix86_align_loops_string);
2027 if (i < 0 || i > MAX_CODE_ALIGN)
2028 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2030 align_loops = 1 << i;
2034 if (ix86_align_jumps_string)
2036 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2037 if (align_jumps == 0)
2039 i = atoi (ix86_align_jumps_string);
2040 if (i < 0 || i > MAX_CODE_ALIGN)
2041 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2043 align_jumps = 1 << i;
2047 if (ix86_align_funcs_string)
2049 warning (0, "-malign-functions is obsolete, use -falign-functions");
2050 if (align_functions == 0)
2052 i = atoi (ix86_align_funcs_string);
2053 if (i < 0 || i > MAX_CODE_ALIGN)
2054 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2056 align_functions = 1 << i;
2060 /* Default align_* from the processor table. */
2061 if (align_loops == 0)
2063 align_loops = processor_target_table[ix86_tune].align_loop;
2064 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2066 if (align_jumps == 0)
2068 align_jumps = processor_target_table[ix86_tune].align_jump;
2069 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2071 if (align_functions == 0)
2073 align_functions = processor_target_table[ix86_tune].align_func;
2076 /* Validate -mbranch-cost= value, or provide default. */
2077 ix86_branch_cost = ix86_cost->branch_cost;
2078 if (ix86_branch_cost_string)
2080 i = atoi (ix86_branch_cost_string);
2082 error ("-mbranch-cost=%d is not between 0 and 5", i);
2084 ix86_branch_cost = i;
2086 if (ix86_section_threshold_string)
2088 i = atoi (ix86_section_threshold_string);
2090 error ("-mlarge-data-threshold=%d is negative", i);
2092 ix86_section_threshold = i;
2095 if (ix86_tls_dialect_string)
2097 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2098 ix86_tls_dialect = TLS_DIALECT_GNU;
2099 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2100 ix86_tls_dialect = TLS_DIALECT_GNU2;
2101 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2102 ix86_tls_dialect = TLS_DIALECT_SUN;
2104 error ("bad value (%s) for -mtls-dialect= switch",
2105 ix86_tls_dialect_string);
2108 if (ix87_precision_string)
2110 i = atoi (ix87_precision_string);
2111 if (i != 32 && i != 64 && i != 80)
2112 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2115 /* Keep nonleaf frame pointers. */
2116 if (flag_omit_frame_pointer)
2117 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2118 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2119 flag_omit_frame_pointer = 1;
2121 /* If we're doing fast math, we don't care about comparison order
2122 wrt NaNs. This lets us use a shorter comparison sequence. */
2123 if (flag_finite_math_only)
2124 target_flags &= ~MASK_IEEE_FP;
2126 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2127 since the insns won't need emulation. */
2128 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2129 target_flags &= ~MASK_NO_FANCY_MATH_387;
2131 /* Likewise, if the target doesn't have a 387, or we've specified
2132 software floating point, don't use 387 inline intrinsics. */
2134 target_flags |= MASK_NO_FANCY_MATH_387;
2136 /* Turn on SSE3 builtins for -mssse3. */
2138 target_flags |= MASK_SSE3;
2140 /* Turn on SSE3 builtins for -msse4a. */
2142 target_flags |= MASK_SSE3;
2144 /* Turn on SSE2 builtins for -msse3. */
2146 target_flags |= MASK_SSE2;
2148 /* Turn on SSE builtins for -msse2. */
2150 target_flags |= MASK_SSE;
2152 /* Turn on MMX builtins for -msse. */
2155 target_flags |= MASK_MMX & ~target_flags_explicit;
2156 x86_prefetch_sse = true;
2159 /* Turn on MMX builtins for 3Dnow. */
2161 target_flags |= MASK_MMX;
2163 /* Turn on POPCNT builtins for -mabm. */
2165 target_flags |= MASK_POPCNT;
2170 warning (0, "-mrtd is ignored in 64bit mode");
2172 /* Enable by default the SSE and MMX builtins. Do allow the user to
2173 explicitly disable any of these. In particular, disabling SSE and
2174 MMX for kernel code is extremely useful. */
2176 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2177 & ~target_flags_explicit);
2181 /* i386 ABI does not specify red zone. It still makes sense to use it
2182 when programmer takes care to stack from being destroyed. */
2183 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2184 target_flags |= MASK_NO_RED_ZONE;
2187 /* Validate -mpreferred-stack-boundary= value, or provide default.
2188 The default of 128 bits is for Pentium III's SSE __m128. We can't
2189 change it because of optimize_size. Otherwise, we can't mix object
2190 files compiled with -Os and -On. */
2191 ix86_preferred_stack_boundary = 128;
2192 if (ix86_preferred_stack_boundary_string)
2194 i = atoi (ix86_preferred_stack_boundary_string);
2195 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2196 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2197 TARGET_64BIT ? 4 : 2);
2199 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2202 /* Accept -msseregparm only if at least SSE support is enabled. */
2203 if (TARGET_SSEREGPARM
2205 error ("-msseregparm used without SSE enabled");
2207 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2208 if (ix86_fpmath_string != 0)
2210 if (! strcmp (ix86_fpmath_string, "387"))
2211 ix86_fpmath = FPMATH_387;
2212 else if (! strcmp (ix86_fpmath_string, "sse"))
2216 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2217 ix86_fpmath = FPMATH_387;
2220 ix86_fpmath = FPMATH_SSE;
2222 else if (! strcmp (ix86_fpmath_string, "387,sse")
2223 || ! strcmp (ix86_fpmath_string, "sse,387"))
2227 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2228 ix86_fpmath = FPMATH_387;
2230 else if (!TARGET_80387)
2232 warning (0, "387 instruction set disabled, using SSE arithmetics");
2233 ix86_fpmath = FPMATH_SSE;
2236 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2239 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2242 /* If the i387 is disabled, then do not return values in it. */
2244 target_flags &= ~MASK_FLOAT_RETURNS;
2246 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2247 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2249 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2251 /* ??? Unwind info is not correct around the CFG unless either a frame
2252 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2253 unwind info generation to be aware of the CFG and propagating states
2255 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2256 || flag_exceptions || flag_non_call_exceptions)
2257 && flag_omit_frame_pointer
2258 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2260 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2261 warning (0, "unwind tables currently require either a frame pointer "
2262 "or -maccumulate-outgoing-args for correctness");
2263 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2266 /* For sane SSE instruction set generation we need fcomi instruction.
2267 It is safe to enable all CMOVE instructions. */
2271 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2274 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2275 p = strchr (internal_label_prefix, 'X');
2276 internal_label_prefix_len = p - internal_label_prefix;
2280 /* When scheduling description is not available, disable scheduler pass
2281 so it won't slow down the compilation and make x87 code slower. */
2282 if (!TARGET_SCHEDULE)
2283 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2285 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2286 set_param_value ("simultaneous-prefetches",
2287 ix86_cost->simultaneous_prefetches);
2288 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2289 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2292 /* Return true if this goes in large data/bss. */
2295 ix86_in_large_data_p (tree exp)
2297 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2300 /* Functions are never large data. */
2301 if (TREE_CODE (exp) == FUNCTION_DECL)
2304 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2306 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2307 if (strcmp (section, ".ldata") == 0
2308 || strcmp (section, ".lbss") == 0)
2314 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2316 /* If this is an incomplete type with size 0, then we can't put it
2317 in data because it might be too big when completed. */
2318 if (!size || size > ix86_section_threshold)
2325 /* Switch to the appropriate section for output of DECL.
2326 DECL is either a `VAR_DECL' node or a constant of some sort.
2327 RELOC indicates whether forming the initial value of DECL requires
2328 link-time relocations. */
2330 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2334 x86_64_elf_select_section (tree decl, int reloc,
2335 unsigned HOST_WIDE_INT align)
2337 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2338 && ix86_in_large_data_p (decl))
2340 const char *sname = NULL;
2341 unsigned int flags = SECTION_WRITE;
2342 switch (categorize_decl_for_section (decl, reloc))
2347 case SECCAT_DATA_REL:
2348 sname = ".ldata.rel";
2350 case SECCAT_DATA_REL_LOCAL:
2351 sname = ".ldata.rel.local";
2353 case SECCAT_DATA_REL_RO:
2354 sname = ".ldata.rel.ro";
2356 case SECCAT_DATA_REL_RO_LOCAL:
2357 sname = ".ldata.rel.ro.local";
2361 flags |= SECTION_BSS;
2364 case SECCAT_RODATA_MERGE_STR:
2365 case SECCAT_RODATA_MERGE_STR_INIT:
2366 case SECCAT_RODATA_MERGE_CONST:
2370 case SECCAT_SRODATA:
2377 /* We don't split these for medium model. Place them into
2378 default sections and hope for best. */
2383 /* We might get called with string constants, but get_named_section
2384 doesn't like them as they are not DECLs. Also, we need to set
2385 flags in that case. */
2387 return get_section (sname, flags, NULL);
2388 return get_named_section (decl, sname, reloc);
2391 return default_elf_select_section (decl, reloc, align);
2394 /* Build up a unique section name, expressed as a
2395 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2396 RELOC indicates whether the initial value of EXP requires
2397 link-time relocations. */
2399 static void ATTRIBUTE_UNUSED
2400 x86_64_elf_unique_section (tree decl, int reloc)
2402 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2403 && ix86_in_large_data_p (decl))
2405 const char *prefix = NULL;
2406 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2407 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2409 switch (categorize_decl_for_section (decl, reloc))
2412 case SECCAT_DATA_REL:
2413 case SECCAT_DATA_REL_LOCAL:
2414 case SECCAT_DATA_REL_RO:
2415 case SECCAT_DATA_REL_RO_LOCAL:
2416 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2419 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2422 case SECCAT_RODATA_MERGE_STR:
2423 case SECCAT_RODATA_MERGE_STR_INIT:
2424 case SECCAT_RODATA_MERGE_CONST:
2425 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2427 case SECCAT_SRODATA:
2434 /* We don't split these for medium model. Place them into
2435 default sections and hope for best. */
2443 plen = strlen (prefix);
2445 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2446 name = targetm.strip_name_encoding (name);
2447 nlen = strlen (name);
2449 string = alloca (nlen + plen + 1);
2450 memcpy (string, prefix, plen);
2451 memcpy (string + plen, name, nlen + 1);
2453 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2457 default_unique_section (decl, reloc);
2460 #ifdef COMMON_ASM_OP
2461 /* This says how to output assembler code to declare an
2462 uninitialized external linkage data object.
2464 For medium model x86-64 we need to use .largecomm opcode for
2467 x86_elf_aligned_common (FILE *file,
2468 const char *name, unsigned HOST_WIDE_INT size,
2471 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2472 && size > (unsigned int)ix86_section_threshold)
2473 fprintf (file, ".largecomm\t");
2475 fprintf (file, "%s", COMMON_ASM_OP);
2476 assemble_name (file, name);
2477 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2478 size, align / BITS_PER_UNIT);
2482 /* Utility function for targets to use in implementing
2483 ASM_OUTPUT_ALIGNED_BSS. */
2486 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2487 const char *name, unsigned HOST_WIDE_INT size,
2490 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2491 && size > (unsigned int)ix86_section_threshold)
2492 switch_to_section (get_named_section (decl, ".lbss", 0));
2494 switch_to_section (bss_section);
2495 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2496 #ifdef ASM_DECLARE_OBJECT_NAME
2497 last_assemble_variable_decl = decl;
2498 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2500 /* Standard thing is just output label for the object. */
2501 ASM_OUTPUT_LABEL (file, name);
2502 #endif /* ASM_DECLARE_OBJECT_NAME */
2503 ASM_OUTPUT_SKIP (file, size ? size : 1);
2507 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2509 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2510 make the problem with not enough registers even worse. */
2511 #ifdef INSN_SCHEDULING
2513 flag_schedule_insns = 0;
2517 /* The Darwin libraries never set errno, so we might as well
2518 avoid calling them when that's the only reason we would. */
2519 flag_errno_math = 0;
2521 /* The default values of these switches depend on the TARGET_64BIT
2522 that is not known at this moment. Mark these values with 2 and
2523 let user the to override these. In case there is no command line option
2524 specifying them, we will set the defaults in override_options. */
2526 flag_omit_frame_pointer = 2;
2527 flag_pcc_struct_return = 2;
2528 flag_asynchronous_unwind_tables = 2;
2529 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2530 SUBTARGET_OPTIMIZATION_OPTIONS;
2534 /* Decide whether we can make a sibling call to a function. DECL is the
2535 declaration of the function being targeted by the call and EXP is the
2536 CALL_EXPR representing the call. */
2539 ix86_function_ok_for_sibcall (tree decl, tree exp)
2544 /* If we are generating position-independent code, we cannot sibcall
2545 optimize any indirect call, or a direct call to a global function,
2546 as the PLT requires %ebx be live. */
2547 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2554 func = TREE_TYPE (CALL_EXPR_FN (exp));
2555 if (POINTER_TYPE_P (func))
2556 func = TREE_TYPE (func);
2559 /* Check that the return value locations are the same. Like
2560 if we are returning floats on the 80387 register stack, we cannot
2561 make a sibcall from a function that doesn't return a float to a
2562 function that does or, conversely, from a function that does return
2563 a float to a function that doesn't; the necessary stack adjustment
2564 would not be executed. This is also the place we notice
2565 differences in the return value ABI. Note that it is ok for one
2566 of the functions to have void return type as long as the return
2567 value of the other is passed in a register. */
2568 a = ix86_function_value (TREE_TYPE (exp), func, false);
2569 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2571 if (STACK_REG_P (a) || STACK_REG_P (b))
2573 if (!rtx_equal_p (a, b))
2576 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2578 else if (!rtx_equal_p (a, b))
2581 /* If this call is indirect, we'll need to be able to use a call-clobbered
2582 register for the address of the target function. Make sure that all
2583 such registers are not used for passing parameters. */
2584 if (!decl && !TARGET_64BIT)
2588 /* We're looking at the CALL_EXPR, we need the type of the function. */
2589 type = CALL_EXPR_FN (exp); /* pointer expression */
2590 type = TREE_TYPE (type); /* pointer type */
2591 type = TREE_TYPE (type); /* function type */
2593 if (ix86_function_regparm (type, NULL) >= 3)
2595 /* ??? Need to count the actual number of registers to be used,
2596 not the possible number of registers. Fix later. */
2601 /* Dllimport'd functions are also called indirectly. */
2602 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2603 && decl && DECL_DLLIMPORT_P (decl)
2604 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2607 /* If we forced aligned the stack, then sibcalling would unalign the
2608 stack, which may break the called function. */
2609 if (cfun->machine->force_align_arg_pointer)
2612 /* Otherwise okay. That also includes certain types of indirect calls. */
2616 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2617 calling convention attributes;
2618 arguments as in struct attribute_spec.handler. */
2621 ix86_handle_cconv_attribute (tree *node, tree name,
2623 int flags ATTRIBUTE_UNUSED,
2626 if (TREE_CODE (*node) != FUNCTION_TYPE
2627 && TREE_CODE (*node) != METHOD_TYPE
2628 && TREE_CODE (*node) != FIELD_DECL
2629 && TREE_CODE (*node) != TYPE_DECL)
2631 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2632 IDENTIFIER_POINTER (name));
2633 *no_add_attrs = true;
2637 /* Can combine regparm with all attributes but fastcall. */
2638 if (is_attribute_p ("regparm", name))
2642 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2644 error ("fastcall and regparm attributes are not compatible");
2647 cst = TREE_VALUE (args);
2648 if (TREE_CODE (cst) != INTEGER_CST)
2650 warning (OPT_Wattributes,
2651 "%qs attribute requires an integer constant argument",
2652 IDENTIFIER_POINTER (name));
2653 *no_add_attrs = true;
2655 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2657 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2658 IDENTIFIER_POINTER (name), REGPARM_MAX);
2659 *no_add_attrs = true;
2663 && lookup_attribute (ix86_force_align_arg_pointer_string,
2664 TYPE_ATTRIBUTES (*node))
2665 && compare_tree_int (cst, REGPARM_MAX-1))
2667 error ("%s functions limited to %d register parameters",
2668 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2676 /* Do not warn when emulating the MS ABI. */
2677 if (!TARGET_64BIT_MS_ABI)
2678 warning (OPT_Wattributes, "%qs attribute ignored",
2679 IDENTIFIER_POINTER (name));
2680 *no_add_attrs = true;
2684 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2685 if (is_attribute_p ("fastcall", name))
2687 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2689 error ("fastcall and cdecl attributes are not compatible");
2691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2693 error ("fastcall and stdcall attributes are not compatible");
2695 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2697 error ("fastcall and regparm attributes are not compatible");
2701 /* Can combine stdcall with fastcall (redundant), regparm and
2703 else if (is_attribute_p ("stdcall", name))
2705 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2707 error ("stdcall and cdecl attributes are not compatible");
2709 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2711 error ("stdcall and fastcall attributes are not compatible");
2715 /* Can combine cdecl with regparm and sseregparm. */
2716 else if (is_attribute_p ("cdecl", name))
2718 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2720 error ("stdcall and cdecl attributes are not compatible");
2722 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2724 error ("fastcall and cdecl attributes are not compatible");
2728 /* Can combine sseregparm with all attributes. */
2733 /* Return 0 if the attributes for two types are incompatible, 1 if they
2734 are compatible, and 2 if they are nearly compatible (which causes a
2735 warning to be generated). */
2738 ix86_comp_type_attributes (tree type1, tree type2)
2740 /* Check for mismatch of non-default calling convention. */
2741 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2743 if (TREE_CODE (type1) != FUNCTION_TYPE)
2746 /* Check for mismatched fastcall/regparm types. */
2747 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2748 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2749 || (ix86_function_regparm (type1, NULL)
2750 != ix86_function_regparm (type2, NULL)))
2753 /* Check for mismatched sseregparm types. */
2754 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2755 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2758 /* Check for mismatched return types (cdecl vs stdcall). */
2759 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2760 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2766 /* Return the regparm value for a function with the indicated TYPE and DECL.
2767 DECL may be NULL when calling function indirectly
2768 or considering a libcall. */
2771 ix86_function_regparm (tree type, tree decl)
2774 int regparm = ix86_regparm;
2779 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2781 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2783 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2786 /* Use register calling convention for local functions when possible. */
2787 if (decl && flag_unit_at_a_time && !profile_flag)
2789 struct cgraph_local_info *i = cgraph_local_info (decl);
2792 int local_regparm, globals = 0, regno;
2795 /* Make sure no regparm register is taken by a
2796 global register variable. */
2797 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2798 if (global_regs[local_regparm])
2801 /* We can't use regparm(3) for nested functions as these use
2802 static chain pointer in third argument. */
2803 if (local_regparm == 3
2804 && decl_function_context (decl)
2805 && !DECL_NO_STATIC_CHAIN (decl))
2808 /* If the function realigns its stackpointer, the prologue will
2809 clobber %ecx. If we've already generated code for the callee,
2810 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2811 scanning the attributes for the self-realigning property. */
2812 f = DECL_STRUCT_FUNCTION (decl);
2813 if (local_regparm == 3
2814 && (f ? !!f->machine->force_align_arg_pointer
2815 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2816 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2819 /* Each global register variable increases register preassure,
2820 so the more global reg vars there are, the smaller regparm
2821 optimization use, unless requested by the user explicitly. */
2822 for (regno = 0; regno < 6; regno++)
2823 if (global_regs[regno])
2826 = globals < local_regparm ? local_regparm - globals : 0;
2828 if (local_regparm > regparm)
2829 regparm = local_regparm;
2836 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2837 DFmode (2) arguments in SSE registers for a function with the
2838 indicated TYPE and DECL. DECL may be NULL when calling function
2839 indirectly or considering a libcall. Otherwise return 0. */
2842 ix86_function_sseregparm (tree type, tree decl)
2844 gcc_assert (!TARGET_64BIT);
2846 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2847 by the sseregparm attribute. */
2848 if (TARGET_SSEREGPARM
2849 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2854 error ("Calling %qD with attribute sseregparm without "
2855 "SSE/SSE2 enabled", decl);
2857 error ("Calling %qT with attribute sseregparm without "
2858 "SSE/SSE2 enabled", type);
2865 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2866 (and DFmode for SSE2) arguments in SSE registers. */
2867 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2869 struct cgraph_local_info *i = cgraph_local_info (decl);
2871 return TARGET_SSE2 ? 2 : 1;
2877 /* Return true if EAX is live at the start of the function. Used by
2878 ix86_expand_prologue to determine if we need special help before
2879 calling allocate_stack_worker. */
2882 ix86_eax_live_at_start_p (void)
2884 /* Cheat. Don't bother working forward from ix86_function_regparm
2885 to the function type to whether an actual argument is located in
2886 eax. Instead just look at cfg info, which is still close enough
2887 to correct at this point. This gives false positives for broken
2888 functions that might use uninitialized data that happens to be
2889 allocated in eax, but who cares? */
2890 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2893 /* Return true if TYPE has a variable argument list. */
2896 type_has_variadic_args_p (tree type)
2900 for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
2901 if (t == void_list_node)
2906 /* Value is the number of bytes of arguments automatically
2907 popped when returning from a subroutine call.
2908 FUNDECL is the declaration node of the function (as a tree),
2909 FUNTYPE is the data type of the function (as a tree),
2910 or for a library call it is an identifier node for the subroutine name.
2911 SIZE is the number of bytes of arguments passed on the stack.
2913 On the 80386, the RTD insn may be used to pop them if the number
2914 of args is fixed, but if the number is variable then the caller
2915 must pop them all. RTD can't be used for library calls now
2916 because the library is compiled with the Unix compiler.
2917 Use of RTD is a selectable option, since it is incompatible with
2918 standard Unix calling sequences. If the option is not selected,
2919 the caller must always pop the args.
2921 The attribute stdcall is equivalent to RTD on a per module basis. */
2924 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2928 /* None of the 64-bit ABIs pop arguments. */
2932 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2934 /* Cdecl functions override -mrtd, and never pop the stack. */
2935 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2937 /* Stdcall and fastcall functions will pop the stack if not
2939 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2940 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2943 if (rtd && ! type_has_variadic_args_p (funtype))
2947 /* Lose any fake structure return argument if it is passed on the stack. */
2948 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2949 && !KEEP_AGGREGATE_RETURN_POINTER)
2951 int nregs = ix86_function_regparm (funtype, fundecl);
2953 return GET_MODE_SIZE (Pmode);
2959 /* Argument support functions. */
2961 /* Return true when register may be used to pass function parameters. */
2963 ix86_function_arg_regno_p (int regno)
2966 const int *parm_regs;
2971 return (regno < REGPARM_MAX
2972 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2974 return (regno < REGPARM_MAX
2975 || (TARGET_MMX && MMX_REGNO_P (regno)
2976 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2977 || (TARGET_SSE && SSE_REGNO_P (regno)
2978 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2983 if (SSE_REGNO_P (regno) && TARGET_SSE)
2988 if (TARGET_SSE && SSE_REGNO_P (regno)
2989 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2993 /* RAX is used as hidden argument to va_arg functions. */
2994 if (!TARGET_64BIT_MS_ABI && regno == 0)
2997 if (TARGET_64BIT_MS_ABI)
2998 parm_regs = x86_64_ms_abi_int_parameter_registers;
3000 parm_regs = x86_64_int_parameter_registers;
3001 for (i = 0; i < REGPARM_MAX; i++)
3002 if (regno == parm_regs[i])
3007 /* Return if we do not know how to pass TYPE solely in registers. */
3010 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3012 if (must_pass_in_stack_var_size_or_pad (mode, type))
3015 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3016 The layout_type routine is crafty and tries to trick us into passing
3017 currently unsupported vector types on the stack by using TImode. */
3018 return (!TARGET_64BIT && mode == TImode
3019 && type && TREE_CODE (type) != VECTOR_TYPE);
3022 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3023 for a call to a function whose data type is FNTYPE.
3024 For a library call, FNTYPE is 0. */
3027 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3028 tree fntype, /* tree ptr for function decl */
3029 rtx libname, /* SYMBOL_REF of library name or 0 */
3032 memset (cum, 0, sizeof (*cum));
3034 /* Set up the number of registers to use for passing arguments. */
3035 cum->nregs = ix86_regparm;
3037 cum->sse_nregs = SSE_REGPARM_MAX;
3039 cum->mmx_nregs = MMX_REGPARM_MAX;
3040 cum->warn_sse = true;
3041 cum->warn_mmx = true;
3042 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3046 /* If there are variable arguments, then we won't pass anything
3047 in registers in 32-bit mode. */
3048 if (cum->maybe_vaarg)
3058 /* Use ecx and edx registers if function has fastcall attribute,
3059 else look for regparm information. */
3062 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3068 cum->nregs = ix86_function_regparm (fntype, fndecl);
3071 /* Set up the number of SSE registers used for passing SFmode
3072 and DFmode arguments. Warn for mismatching ABI. */
3073 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3077 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3078 But in the case of vector types, it is some vector mode.
3080 When we have only some of our vector isa extensions enabled, then there
3081 are some modes for which vector_mode_supported_p is false. For these
3082 modes, the generic vector support in gcc will choose some non-vector mode
3083 in order to implement the type. By computing the natural mode, we'll
3084 select the proper ABI location for the operand and not depend on whatever
3085 the middle-end decides to do with these vector types. */
3087 static enum machine_mode
3088 type_natural_mode (tree type)
3090 enum machine_mode mode = TYPE_MODE (type);
3092 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3094 HOST_WIDE_INT size = int_size_in_bytes (type);
3095 if ((size == 8 || size == 16)
3096 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3097 && TYPE_VECTOR_SUBPARTS (type) > 1)
3099 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3101 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3102 mode = MIN_MODE_VECTOR_FLOAT;
3104 mode = MIN_MODE_VECTOR_INT;
3106 /* Get the mode which has this inner mode and number of units. */
3107 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3108 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3109 && GET_MODE_INNER (mode) == innermode)
3119 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3120 this may not agree with the mode that the type system has chosen for the
3121 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3122 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3125 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3130 if (orig_mode != BLKmode)
3131 tmp = gen_rtx_REG (orig_mode, regno);
3134 tmp = gen_rtx_REG (mode, regno);
3135 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3136 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3142 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3143 of this code is to classify each 8bytes of incoming argument by the register
3144 class and assign registers accordingly. */
3146 /* Return the union class of CLASS1 and CLASS2.
3147 See the x86-64 PS ABI for details. */
3149 static enum x86_64_reg_class
3150 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3152 /* Rule #1: If both classes are equal, this is the resulting class. */
3153 if (class1 == class2)
3156 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3158 if (class1 == X86_64_NO_CLASS)
3160 if (class2 == X86_64_NO_CLASS)
3163 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3164 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3165 return X86_64_MEMORY_CLASS;
3167 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3168 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3169 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3170 return X86_64_INTEGERSI_CLASS;
3171 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3172 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3173 return X86_64_INTEGER_CLASS;
3175 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3177 if (class1 == X86_64_X87_CLASS
3178 || class1 == X86_64_X87UP_CLASS
3179 || class1 == X86_64_COMPLEX_X87_CLASS
3180 || class2 == X86_64_X87_CLASS
3181 || class2 == X86_64_X87UP_CLASS
3182 || class2 == X86_64_COMPLEX_X87_CLASS)
3183 return X86_64_MEMORY_CLASS;
3185 /* Rule #6: Otherwise class SSE is used. */
3186 return X86_64_SSE_CLASS;
3189 /* Classify the argument of type TYPE and mode MODE.
3190 CLASSES will be filled by the register class used to pass each word
3191 of the operand. The number of words is returned. In case the parameter
3192 should be passed in memory, 0 is returned. As a special case for zero
3193 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3195 BIT_OFFSET is used internally for handling records and specifies offset
3196 of the offset in bits modulo 256 to avoid overflow cases.
3198 See the x86-64 PS ABI for details.
3202 classify_argument (enum machine_mode mode, tree type,
3203 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3205 HOST_WIDE_INT bytes =
3206 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3207 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3209 /* Variable sized entities are always passed/returned in memory. */
3213 if (mode != VOIDmode
3214 && targetm.calls.must_pass_in_stack (mode, type))
3217 if (type && AGGREGATE_TYPE_P (type))
3221 enum x86_64_reg_class subclasses[MAX_CLASSES];
3223 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3227 for (i = 0; i < words; i++)
3228 classes[i] = X86_64_NO_CLASS;
3230 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3231 signalize memory class, so handle it as special case. */
3234 classes[0] = X86_64_NO_CLASS;
3238 /* Classify each field of record and merge classes. */
3239 switch (TREE_CODE (type))
3242 /* And now merge the fields of structure. */
3243 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3245 if (TREE_CODE (field) == FIELD_DECL)
3249 if (TREE_TYPE (field) == error_mark_node)
3252 /* Bitfields are always classified as integer. Handle them
3253 early, since later code would consider them to be
3254 misaligned integers. */
3255 if (DECL_BIT_FIELD (field))
3257 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3258 i < ((int_bit_position (field) + (bit_offset % 64))
3259 + tree_low_cst (DECL_SIZE (field), 0)
3262 merge_classes (X86_64_INTEGER_CLASS,
3267 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3268 TREE_TYPE (field), subclasses,
3269 (int_bit_position (field)
3270 + bit_offset) % 256);
3273 for (i = 0; i < num; i++)
3276 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3278 merge_classes (subclasses[i], classes[i + pos]);
3286 /* Arrays are handled as small records. */
3289 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3290 TREE_TYPE (type), subclasses, bit_offset);
3294 /* The partial classes are now full classes. */
3295 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3296 subclasses[0] = X86_64_SSE_CLASS;
3297 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3298 subclasses[0] = X86_64_INTEGER_CLASS;
3300 for (i = 0; i < words; i++)
3301 classes[i] = subclasses[i % num];
3306 case QUAL_UNION_TYPE:
3307 /* Unions are similar to RECORD_TYPE but offset is always 0.
3309 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3311 if (TREE_CODE (field) == FIELD_DECL)
3315 if (TREE_TYPE (field) == error_mark_node)
3318 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3319 TREE_TYPE (field), subclasses,
3323 for (i = 0; i < num; i++)
3324 classes[i] = merge_classes (subclasses[i], classes[i]);
3333 /* Final merger cleanup. */
3334 for (i = 0; i < words; i++)
3336 /* If one class is MEMORY, everything should be passed in
3338 if (classes[i] == X86_64_MEMORY_CLASS)
3341 /* The X86_64_SSEUP_CLASS should be always preceded by
3342 X86_64_SSE_CLASS. */
3343 if (classes[i] == X86_64_SSEUP_CLASS
3344 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3345 classes[i] = X86_64_SSE_CLASS;
3347 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3348 if (classes[i] == X86_64_X87UP_CLASS
3349 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3350 classes[i] = X86_64_SSE_CLASS;
3355 /* Compute alignment needed. We align all types to natural boundaries with
3356 exception of XFmode that is aligned to 64bits. */
3357 if (mode != VOIDmode && mode != BLKmode)
3359 int mode_alignment = GET_MODE_BITSIZE (mode);
3362 mode_alignment = 128;
3363 else if (mode == XCmode)
3364 mode_alignment = 256;
3365 if (COMPLEX_MODE_P (mode))
3366 mode_alignment /= 2;
3367 /* Misaligned fields are always returned in memory. */
3368 if (bit_offset % mode_alignment)
3372 /* for V1xx modes, just use the base mode */
3373 if (VECTOR_MODE_P (mode)
3374 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3375 mode = GET_MODE_INNER (mode);
3377 /* Classification of atomic types. */
3382 classes[0] = X86_64_SSE_CLASS;
3385 classes[0] = X86_64_SSE_CLASS;
3386 classes[1] = X86_64_SSEUP_CLASS;
3395 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3396 classes[0] = X86_64_INTEGERSI_CLASS;
3398 classes[0] = X86_64_INTEGER_CLASS;
3402 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3407 if (!(bit_offset % 64))
3408 classes[0] = X86_64_SSESF_CLASS;
3410 classes[0] = X86_64_SSE_CLASS;
3413 classes[0] = X86_64_SSEDF_CLASS;
3416 classes[0] = X86_64_X87_CLASS;
3417 classes[1] = X86_64_X87UP_CLASS;
3420 classes[0] = X86_64_SSE_CLASS;
3421 classes[1] = X86_64_SSEUP_CLASS;
3424 classes[0] = X86_64_SSE_CLASS;
3427 classes[0] = X86_64_SSEDF_CLASS;
3428 classes[1] = X86_64_SSEDF_CLASS;
3431 classes[0] = X86_64_COMPLEX_X87_CLASS;
3434 /* This modes is larger than 16 bytes. */
3442 classes[0] = X86_64_SSE_CLASS;
3443 classes[1] = X86_64_SSEUP_CLASS;
3449 classes[0] = X86_64_SSE_CLASS;
3455 gcc_assert (VECTOR_MODE_P (mode));
3460 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3462 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3463 classes[0] = X86_64_INTEGERSI_CLASS;
3465 classes[0] = X86_64_INTEGER_CLASS;
3466 classes[1] = X86_64_INTEGER_CLASS;
3467 return 1 + (bytes > 8);
3471 /* Examine the argument and return set number of register required in each
3472 class. Return 0 iff parameter should be passed in memory. */
3474 examine_argument (enum machine_mode mode, tree type, int in_return,
3475 int *int_nregs, int *sse_nregs)
3477 enum x86_64_reg_class class[MAX_CLASSES];
3478 int n = classify_argument (mode, type, class, 0);
3484 for (n--; n >= 0; n--)
3487 case X86_64_INTEGER_CLASS:
3488 case X86_64_INTEGERSI_CLASS:
3491 case X86_64_SSE_CLASS:
3492 case X86_64_SSESF_CLASS:
3493 case X86_64_SSEDF_CLASS:
3496 case X86_64_NO_CLASS:
3497 case X86_64_SSEUP_CLASS:
3499 case X86_64_X87_CLASS:
3500 case X86_64_X87UP_CLASS:
3504 case X86_64_COMPLEX_X87_CLASS:
3505 return in_return ? 2 : 0;
3506 case X86_64_MEMORY_CLASS:
3512 /* Construct container for the argument used by GCC interface. See
3513 FUNCTION_ARG for the detailed description. */
3516 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3517 tree type, int in_return, int nintregs, int nsseregs,
3518 const int *intreg, int sse_regno)
3520 /* The following variables hold the static issued_error state. */
3521 static bool issued_sse_arg_error;
3522 static bool issued_sse_ret_error;
3523 static bool issued_x87_ret_error;
3525 enum machine_mode tmpmode;
3527 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3528 enum x86_64_reg_class class[MAX_CLASSES];
3532 int needed_sseregs, needed_intregs;
3533 rtx exp[MAX_CLASSES];
3536 n = classify_argument (mode, type, class, 0);
3539 if (!examine_argument (mode, type, in_return, &needed_intregs,
3542 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3545 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3546 some less clueful developer tries to use floating-point anyway. */
3547 if (needed_sseregs && !TARGET_SSE)
3551 if (!issued_sse_ret_error)
3553 error ("SSE register return with SSE disabled");
3554 issued_sse_ret_error = true;
3557 else if (!issued_sse_arg_error)
3559 error ("SSE register argument with SSE disabled");
3560 issued_sse_arg_error = true;
3565 /* Likewise, error if the ABI requires us to return values in the
3566 x87 registers and the user specified -mno-80387. */
3567 if (!TARGET_80387 && in_return)
3568 for (i = 0; i < n; i++)
3569 if (class[i] == X86_64_X87_CLASS
3570 || class[i] == X86_64_X87UP_CLASS
3571 || class[i] == X86_64_COMPLEX_X87_CLASS)
3573 if (!issued_x87_ret_error)
3575 error ("x87 register return with x87 disabled");
3576 issued_x87_ret_error = true;
3581 /* First construct simple cases. Avoid SCmode, since we want to use
3582 single register to pass this type. */
3583 if (n == 1 && mode != SCmode)
3586 case X86_64_INTEGER_CLASS:
3587 case X86_64_INTEGERSI_CLASS:
3588 return gen_rtx_REG (mode, intreg[0]);
3589 case X86_64_SSE_CLASS:
3590 case X86_64_SSESF_CLASS:
3591 case X86_64_SSEDF_CLASS:
3592 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3593 case X86_64_X87_CLASS:
3594 case X86_64_COMPLEX_X87_CLASS:
3595 return gen_rtx_REG (mode, FIRST_STACK_REG);
3596 case X86_64_NO_CLASS:
3597 /* Zero sized array, struct or class. */
3602 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3604 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3607 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3608 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3609 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3610 && class[1] == X86_64_INTEGER_CLASS
3611 && (mode == CDImode || mode == TImode || mode == TFmode)
3612 && intreg[0] + 1 == intreg[1])
3613 return gen_rtx_REG (mode, intreg[0]);
3615 /* Otherwise figure out the entries of the PARALLEL. */
3616 for (i = 0; i < n; i++)
3620 case X86_64_NO_CLASS:
3622 case X86_64_INTEGER_CLASS:
3623 case X86_64_INTEGERSI_CLASS:
3624 /* Merge TImodes on aligned occasions here too. */
3625 if (i * 8 + 8 > bytes)
3626 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3627 else if (class[i] == X86_64_INTEGERSI_CLASS)
3631 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3632 if (tmpmode == BLKmode)
3634 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3635 gen_rtx_REG (tmpmode, *intreg),
3639 case X86_64_SSESF_CLASS:
3640 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3641 gen_rtx_REG (SFmode,
3642 SSE_REGNO (sse_regno)),
3646 case X86_64_SSEDF_CLASS:
3647 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3648 gen_rtx_REG (DFmode,
3649 SSE_REGNO (sse_regno)),
3653 case X86_64_SSE_CLASS:
3654 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3658 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3659 gen_rtx_REG (tmpmode,
3660 SSE_REGNO (sse_regno)),
3662 if (tmpmode == TImode)
3671 /* Empty aligned struct, union or class. */
3675 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3676 for (i = 0; i < nexps; i++)
3677 XVECEXP (ret, 0, i) = exp [i];
3681 /* Update the data in CUM to advance over an argument of mode MODE
3682 and data type TYPE. (TYPE is null for libcalls where that information
3683 may not be available.) */
3686 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3687 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3703 cum->words += words;
3704 cum->nregs -= words;
3705 cum->regno += words;
3707 if (cum->nregs <= 0)
3715 if (cum->float_in_sse < 2)
3718 if (cum->float_in_sse < 1)
3729 if (!type || !AGGREGATE_TYPE_P (type))
3731 cum->sse_words += words;
3732 cum->sse_nregs -= 1;
3733 cum->sse_regno += 1;
3734 if (cum->sse_nregs <= 0)
3746 if (!type || !AGGREGATE_TYPE_P (type))
3748 cum->mmx_words += words;
3749 cum->mmx_nregs -= 1;
3750 cum->mmx_regno += 1;
3751 if (cum->mmx_nregs <= 0)
3762 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3763 tree type, HOST_WIDE_INT words)
3765 int int_nregs, sse_nregs;
3767 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3768 cum->words += words;
3769 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3771 cum->nregs -= int_nregs;
3772 cum->sse_nregs -= sse_nregs;
3773 cum->regno += int_nregs;
3774 cum->sse_regno += sse_nregs;
3777 cum->words += words;
3781 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3782 HOST_WIDE_INT words)
3784 /* Otherwise, this should be passed indirect. */
3785 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3787 cum->words += words;
3796 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3797 tree type, int named ATTRIBUTE_UNUSED)
3799 HOST_WIDE_INT bytes, words;
3801 if (mode == BLKmode)
3802 bytes = int_size_in_bytes (type);
3804 bytes = GET_MODE_SIZE (mode);
3805 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3808 mode = type_natural_mode (type);
3810 if (TARGET_64BIT_MS_ABI)
3811 function_arg_advance_ms_64 (cum, bytes, words);
3812 else if (TARGET_64BIT)
3813 function_arg_advance_64 (cum, mode, type, words);
3815 function_arg_advance_32 (cum, mode, type, bytes, words);
3818 /* Define where to put the arguments to a function.
3819 Value is zero to push the argument on the stack,
3820 or a hard register in which to store the argument.
3822 MODE is the argument's machine mode.
3823 TYPE is the data type of the argument (as a tree).
3824 This is null for libcalls where that information may
3826 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3827 the preceding args and about the function being called.
3828 NAMED is nonzero if this argument is a named parameter
3829 (otherwise it is an extra parameter matching an ellipsis). */
3832 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3833 enum machine_mode orig_mode, tree type,
3834 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3836 static bool warnedsse, warnedmmx;
3838 /* Avoid the AL settings for the Unix64 ABI. */
3839 if (mode == VOIDmode)
3855 if (words <= cum->nregs)
3857 int regno = cum->regno;
3859 /* Fastcall allocates the first two DWORD (SImode) or
3860 smaller arguments to ECX and EDX. */
3863 if (mode == BLKmode || mode == DImode)
3866 /* ECX not EAX is the first allocated register. */
3870 return gen_rtx_REG (mode, regno);
3875 if (cum->float_in_sse < 2)
3878 if (cum->float_in_sse < 1)
3888 if (!type || !AGGREGATE_TYPE_P (type))
3890 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3893 warning (0, "SSE vector argument without SSE enabled "
3897 return gen_reg_or_parallel (mode, orig_mode,
3898 cum->sse_regno + FIRST_SSE_REG);
3906 if (!type || !AGGREGATE_TYPE_P (type))
3908 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3911 warning (0, "MMX vector argument without MMX enabled "
3915 return gen_reg_or_parallel (mode, orig_mode,
3916 cum->mmx_regno + FIRST_MMX_REG);
3925 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3926 enum machine_mode orig_mode, tree type)
3928 /* Handle a hidden AL argument containing number of registers
3929 for varargs x86-64 functions. */
3930 if (mode == VOIDmode)
3931 return GEN_INT (cum->maybe_vaarg
3932 ? (cum->sse_nregs < 0
3937 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3939 &x86_64_int_parameter_registers [cum->regno],
3944 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3945 enum machine_mode orig_mode, int named)
3949 /* Avoid the AL settings for the Unix64 ABI. */
3950 if (mode == VOIDmode)
3953 /* If we've run out of registers, it goes on the stack. */
3954 if (cum->nregs == 0)
3957 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3959 /* Only floating point modes are passed in anything but integer regs. */
3960 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3963 regno = cum->regno + FIRST_SSE_REG;
3968 /* Unnamed floating parameters are passed in both the
3969 SSE and integer registers. */
3970 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3971 t2 = gen_rtx_REG (mode, regno);
3972 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3973 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3974 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3978 return gen_reg_or_parallel (mode, orig_mode, regno);
3982 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3983 tree type, int named)
3985 enum machine_mode mode = omode;
3986 HOST_WIDE_INT bytes, words;
3988 if (mode == BLKmode)
3989 bytes = int_size_in_bytes (type);
3991 bytes = GET_MODE_SIZE (mode);
3992 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3994 /* To simplify the code below, represent vector types with a vector mode
3995 even if MMX/SSE are not active. */
3996 if (type && TREE_CODE (type) == VECTOR_TYPE)
3997 mode = type_natural_mode (type);
3999 if (TARGET_64BIT_MS_ABI)
4000 return function_arg_ms_64 (cum, mode, omode, named);
4001 else if (TARGET_64BIT)
4002 return function_arg_64 (cum, mode, omode, type);
4004 return function_arg_32 (cum, mode, omode, type, bytes, words);
4007 /* A C expression that indicates when an argument must be passed by
4008 reference. If nonzero for an argument, a copy of that argument is
4009 made in memory and a pointer to the argument is passed instead of
4010 the argument itself. The pointer is passed in whatever way is
4011 appropriate for passing a pointer to that type. */
4014 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4015 enum machine_mode mode ATTRIBUTE_UNUSED,
4016 tree type, bool named ATTRIBUTE_UNUSED)
4018 if (TARGET_64BIT_MS_ABI)
4022 /* Arrays are passed by reference. */
4023 if (TREE_CODE (type) == ARRAY_TYPE)
4026 if (AGGREGATE_TYPE_P (type))
4028 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4029 are passed by reference. */
4030 int el2 = exact_log2 (int_size_in_bytes (type));
4031 return !(el2 >= 0 && el2 <= 3);
4035 /* __m128 is passed by reference. */
4036 /* ??? How to handle complex? For now treat them as structs,
4037 and pass them by reference if they're too large. */
4038 if (GET_MODE_SIZE (mode) > 8)
4041 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4047 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4048 ABI. Only called if TARGET_SSE. */
4050 contains_128bit_aligned_vector_p (tree type)
4052 enum machine_mode mode = TYPE_MODE (type);
4053 if (SSE_REG_MODE_P (mode)
4054 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4056 if (TYPE_ALIGN (type) < 128)
4059 if (AGGREGATE_TYPE_P (type))
4061 /* Walk the aggregates recursively. */
4062 switch (TREE_CODE (type))
4066 case QUAL_UNION_TYPE:
4070 /* Walk all the structure fields. */
4071 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4073 if (TREE_CODE (field) == FIELD_DECL
4074 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4081 /* Just for use if some languages passes arrays by value. */
4082 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4093 /* Gives the alignment boundary, in bits, of an argument with the
4094 specified mode and type. */
4097 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4101 align = TYPE_ALIGN (type);
4103 align = GET_MODE_ALIGNMENT (mode);
4104 if (align < PARM_BOUNDARY)
4105 align = PARM_BOUNDARY;
4108 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4109 make an exception for SSE modes since these require 128bit
4112 The handling here differs from field_alignment. ICC aligns MMX
4113 arguments to 4 byte boundaries, while structure fields are aligned
4114 to 8 byte boundaries. */
4116 align = PARM_BOUNDARY;
4119 if (!SSE_REG_MODE_P (mode))
4120 align = PARM_BOUNDARY;
4124 if (!contains_128bit_aligned_vector_p (type))
4125 align = PARM_BOUNDARY;
4133 /* Return true if N is a possible register number of function value. */
4136 ix86_function_value_regno_p (int regno)
4143 case FIRST_FLOAT_REG:
4144 if (TARGET_64BIT_MS_ABI)
4146 return TARGET_FLOAT_RETURNS_IN_80387;
4152 if (TARGET_MACHO || TARGET_64BIT)
4160 /* Define how to find the value returned by a function.
4161 VALTYPE is the data type of the value (as a tree).
4162 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4163 otherwise, FUNC is 0. */
4166 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4167 tree fntype, tree fn)
4171 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4172 we normally prevent this case when mmx is not available. However
4173 some ABIs may require the result to be returned like DImode. */
4174 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4175 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4177 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4178 we prevent this case when sse is not available. However some ABIs
4179 may require the result to be returned like integer TImode. */
4180 else if (mode == TImode
4181 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4182 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4184 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4185 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4186 regno = FIRST_FLOAT_REG;
4188 /* Most things go in %eax. */
4191 /* Override FP return register with %xmm0 for local functions when
4192 SSE math is enabled or for functions with sseregparm attribute. */
4193 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4195 int sse_level = ix86_function_sseregparm (fntype, fn);
4196 if ((sse_level >= 1 && mode == SFmode)
4197 || (sse_level == 2 && mode == DFmode))
4198 regno = FIRST_SSE_REG;
4201 return gen_rtx_REG (orig_mode, regno);
4205 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4210 /* Handle libcalls, which don't provide a type node. */
4211 if (valtype == NULL)
4223 return gen_rtx_REG (mode, FIRST_SSE_REG);
4226 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4230 return gen_rtx_REG (mode, 0);
4234 ret = construct_container (mode, orig_mode, valtype, 1,
4235 REGPARM_MAX, SSE_REGPARM_MAX,
4236 x86_64_int_return_registers, 0);
4238 /* For zero sized structures, construct_container returns NULL, but we
4239 need to keep rest of compiler happy by returning meaningful value. */
4241 ret = gen_rtx_REG (orig_mode, 0);
4247 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4249 unsigned int regno = 0;
4253 if (mode == SFmode || mode == DFmode)
4254 regno = FIRST_SSE_REG;
4255 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4256 regno = FIRST_SSE_REG;
4259 return gen_rtx_REG (orig_mode, regno);
4263 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4264 enum machine_mode orig_mode, enum machine_mode mode)
4269 if (fntype_or_decl && DECL_P (fntype_or_decl))
4270 fn = fntype_or_decl;
4271 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4273 if (TARGET_64BIT_MS_ABI)
4274 return function_value_ms_64 (orig_mode, mode);
4275 else if (TARGET_64BIT)
4276 return function_value_64 (orig_mode, mode, valtype);
4278 return function_value_32 (orig_mode, mode, fntype, fn);
4282 ix86_function_value (tree valtype, tree fntype_or_decl,
4283 bool outgoing ATTRIBUTE_UNUSED)
4285 enum machine_mode mode, orig_mode;
4287 orig_mode = TYPE_MODE (valtype);
4288 mode = type_natural_mode (valtype);
4289 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4293 ix86_libcall_value (enum machine_mode mode)
4295 return ix86_function_value_1 (NULL, NULL, mode, mode);
4298 /* Return true iff type is returned in memory. */
4301 return_in_memory_32 (tree type, enum machine_mode mode)
4305 if (mode == BLKmode)
4308 size = int_size_in_bytes (type);
4310 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4313 if (VECTOR_MODE_P (mode) || mode == TImode)
4315 /* User-created vectors small enough to fit in EAX. */
4319 /* MMX/3dNow values are returned in MM0,
4320 except when it doesn't exits. */
4322 return (TARGET_MMX ? 0 : 1);
4324 /* SSE values are returned in XMM0, except when it doesn't exist. */
4326 return (TARGET_SSE ? 0 : 1);
4341 return_in_memory_64 (tree type, enum machine_mode mode)
4343 int needed_intregs, needed_sseregs;
4344 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4348 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4350 HOST_WIDE_INT size = int_size_in_bytes (type);
4352 /* __m128 and friends are returned in xmm0. */
4353 if (size == 16 && VECTOR_MODE_P (mode))
4356 /* Otherwise, the size must be exactly in [1248]. */
4357 return (size != 1 && size != 2 && size != 4 && size != 8);
4361 ix86_return_in_memory (tree type)
4363 enum machine_mode mode = type_natural_mode (type);
4365 if (TARGET_64BIT_MS_ABI)
4366 return return_in_memory_ms_64 (type, mode);
4367 else if (TARGET_64BIT)
4368 return return_in_memory_64 (type, mode);
4370 return return_in_memory_32 (type, mode);
4373 /* When returning SSE vector types, we have a choice of either
4374 (1) being abi incompatible with a -march switch, or
4375 (2) generating an error.
4376 Given no good solution, I think the safest thing is one warning.
4377 The user won't be able to use -Werror, but....
4379 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4380 called in response to actually generating a caller or callee that
4381 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4382 via aggregate_value_p for general type probing from tree-ssa. */
4385 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4387 static bool warnedsse, warnedmmx;
4389 if (!TARGET_64BIT && type)
4391 /* Look at the return type of the function, not the function type. */
4392 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4394 if (!TARGET_SSE && !warnedsse)
4397 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4400 warning (0, "SSE vector return without SSE enabled "
4405 if (!TARGET_MMX && !warnedmmx)
4407 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4410 warning (0, "MMX vector return without MMX enabled "
4420 /* Create the va_list data type. */
4423 ix86_build_builtin_va_list (void)
4425 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4427 /* For i386 we use plain pointer to argument area. */
4428 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4429 return build_pointer_type (char_type_node);
4431 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4432 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4434 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4435 unsigned_type_node);
4436 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4437 unsigned_type_node);
4438 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4440 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4443 va_list_gpr_counter_field = f_gpr;
4444 va_list_fpr_counter_field = f_fpr;
4446 DECL_FIELD_CONTEXT (f_gpr) = record;
4447 DECL_FIELD_CONTEXT (f_fpr) = record;
4448 DECL_FIELD_CONTEXT (f_ovf) = record;
4449 DECL_FIELD_CONTEXT (f_sav) = record;
4451 TREE_CHAIN (record) = type_decl;
4452 TYPE_NAME (record) = type_decl;
4453 TYPE_FIELDS (record) = f_gpr;
4454 TREE_CHAIN (f_gpr) = f_fpr;
4455 TREE_CHAIN (f_fpr) = f_ovf;
4456 TREE_CHAIN (f_ovf) = f_sav;
4458 layout_type (record);
4460 /* The correct type is an array type of one element. */
4461 return build_array_type (record, build_index_type (size_zero_node));
4464 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4467 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4477 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4480 /* Indicate to allocate space on the stack for varargs save area. */
4481 ix86_save_varrargs_registers = 1;
4482 cfun->stack_alignment_needed = 128;
4484 save_area = frame_pointer_rtx;
4485 set = get_varargs_alias_set ();
4487 for (i = cum->regno;
4489 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4492 mem = gen_rtx_MEM (Pmode,
4493 plus_constant (save_area, i * UNITS_PER_WORD));
4494 MEM_NOTRAP_P (mem) = 1;
4495 set_mem_alias_set (mem, set);
4496 emit_move_insn (mem, gen_rtx_REG (Pmode,
4497 x86_64_int_parameter_registers[i]));
4500 if (cum->sse_nregs && cfun->va_list_fpr_size)
4502 /* Now emit code to save SSE registers. The AX parameter contains number
4503 of SSE parameter registers used to call this function. We use
4504 sse_prologue_save insn template that produces computed jump across
4505 SSE saves. We need some preparation work to get this working. */
4507 label = gen_label_rtx ();
4508 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4510 /* Compute address to jump to :
4511 label - 5*eax + nnamed_sse_arguments*5 */
4512 tmp_reg = gen_reg_rtx (Pmode);
4513 nsse_reg = gen_reg_rtx (Pmode);
4514 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4515 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4516 gen_rtx_MULT (Pmode, nsse_reg,
4521 gen_rtx_CONST (DImode,
4522 gen_rtx_PLUS (DImode,
4524 GEN_INT (cum->sse_regno * 4))));
4526 emit_move_insn (nsse_reg, label_ref);
4527 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4529 /* Compute address of memory block we save into. We always use pointer
4530 pointing 127 bytes after first byte to store - this is needed to keep
4531 instruction size limited by 4 bytes. */
4532 tmp_reg = gen_reg_rtx (Pmode);
4533 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4534 plus_constant (save_area,
4535 8 * REGPARM_MAX + 127)));
4536 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4537 MEM_NOTRAP_P (mem) = 1;
4538 set_mem_alias_set (mem, set);
4539 set_mem_align (mem, BITS_PER_WORD);
4541 /* And finally do the dirty job! */
4542 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4543 GEN_INT (cum->sse_regno), label));
4548 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4550 int set = get_varargs_alias_set ();
4553 for (i = cum->regno; i < REGPARM_MAX; i++)
4557 mem = gen_rtx_MEM (Pmode,
4558 plus_constant (virtual_incoming_args_rtx,
4559 i * UNITS_PER_WORD));
4560 MEM_NOTRAP_P (mem) = 1;
4561 set_mem_alias_set (mem, set);
4563 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4564 emit_move_insn (mem, reg);
4569 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4570 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4573 CUMULATIVE_ARGS next_cum;
4577 /* This argument doesn't appear to be used anymore. Which is good,
4578 because the old code here didn't suppress rtl generation. */
4579 gcc_assert (!no_rtl);
4584 fntype = TREE_TYPE (current_function_decl);
4585 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4586 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4587 != void_type_node));
4589 /* For varargs, we do not want to skip the dummy va_dcl argument.
4590 For stdargs, we do want to skip the last named argument. */
4593 function_arg_advance (&next_cum, mode, type, 1);
4595 if (TARGET_64BIT_MS_ABI)
4596 setup_incoming_varargs_ms_64 (&next_cum);
4598 setup_incoming_varargs_64 (&next_cum);
4601 /* Implement va_start. */
4604 ix86_va_start (tree valist, rtx nextarg)
4606 HOST_WIDE_INT words, n_gpr, n_fpr;
4607 tree f_gpr, f_fpr, f_ovf, f_sav;
4608 tree gpr, fpr, ovf, sav, t;
4611 /* Only 64bit target needs something special. */
4612 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4614 std_expand_builtin_va_start (valist, nextarg);
4618 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4619 f_fpr = TREE_CHAIN (f_gpr);
4620 f_ovf = TREE_CHAIN (f_fpr);
4621 f_sav = TREE_CHAIN (f_ovf);
4623 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4624 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4625 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4626 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4627 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4629 /* Count number of gp and fp argument registers used. */
4630 words = current_function_args_info.words;
4631 n_gpr = current_function_args_info.regno;
4632 n_fpr = current_function_args_info.sse_regno;
4634 if (cfun->va_list_gpr_size)
4636 type = TREE_TYPE (gpr);
4637 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4638 build_int_cst (type, n_gpr * 8));
4639 TREE_SIDE_EFFECTS (t) = 1;
4640 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4643 if (cfun->va_list_fpr_size)
4645 type = TREE_TYPE (fpr);
4646 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4647 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4648 TREE_SIDE_EFFECTS (t) = 1;
4649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4652 /* Find the overflow area. */
4653 type = TREE_TYPE (ovf);
4654 t = make_tree (type, virtual_incoming_args_rtx);
4656 t = build2 (PLUS_EXPR, type, t,
4657 build_int_cst (type, words * UNITS_PER_WORD));
4658 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4659 TREE_SIDE_EFFECTS (t) = 1;
4660 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4662 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4664 /* Find the register save area.
4665 Prologue of the function save it right above stack frame. */
4666 type = TREE_TYPE (sav);
4667 t = make_tree (type, frame_pointer_rtx);
4668 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4669 TREE_SIDE_EFFECTS (t) = 1;
4670 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4674 /* Implement va_arg. */
4677 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4679 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4680 tree f_gpr, f_fpr, f_ovf, f_sav;
4681 tree gpr, fpr, ovf, sav, t;
4683 tree lab_false, lab_over = NULL_TREE;
4688 enum machine_mode nat_mode;
4690 /* Only 64bit target needs something special. */
4691 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4692 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4694 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4695 f_fpr = TREE_CHAIN (f_gpr);
4696 f_ovf = TREE_CHAIN (f_fpr);
4697 f_sav = TREE_CHAIN (f_ovf);
4699 valist = build_va_arg_indirect_ref (valist);
4700 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4701 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4702 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4703 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4705 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4707 type = build_pointer_type (type);
4708 size = int_size_in_bytes (type);
4709 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4711 nat_mode = type_natural_mode (type);
4712 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4713 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4715 /* Pull the value out of the saved registers. */
4717 addr = create_tmp_var (ptr_type_node, "addr");
4718 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4722 int needed_intregs, needed_sseregs;
4724 tree int_addr, sse_addr;
4726 lab_false = create_artificial_label ();
4727 lab_over = create_artificial_label ();
4729 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4731 need_temp = (!REG_P (container)
4732 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4733 || TYPE_ALIGN (type) > 128));
4735 /* In case we are passing structure, verify that it is consecutive block
4736 on the register save area. If not we need to do moves. */
4737 if (!need_temp && !REG_P (container))
4739 /* Verify that all registers are strictly consecutive */
4740 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4744 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4746 rtx slot = XVECEXP (container, 0, i);
4747 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4748 || INTVAL (XEXP (slot, 1)) != i * 16)
4756 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4758 rtx slot = XVECEXP (container, 0, i);
4759 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4760 || INTVAL (XEXP (slot, 1)) != i * 8)
4772 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4773 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4774 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4775 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4778 /* First ensure that we fit completely in registers. */
4781 t = build_int_cst (TREE_TYPE (gpr),
4782 (REGPARM_MAX - needed_intregs + 1) * 8);
4783 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4784 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4785 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4786 gimplify_and_add (t, pre_p);
4790 t = build_int_cst (TREE_TYPE (fpr),
4791 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4793 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4794 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4795 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4796 gimplify_and_add (t, pre_p);
4799 /* Compute index to start of area used for integer regs. */
4802 /* int_addr = gpr + sav; */
4803 t = fold_convert (ptr_type_node, gpr);
4804 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4805 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4806 gimplify_and_add (t, pre_p);
4810 /* sse_addr = fpr + sav; */
4811 t = fold_convert (ptr_type_node, fpr);
4812 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4813 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4814 gimplify_and_add (t, pre_p);
4819 tree temp = create_tmp_var (type, "va_arg_tmp");
4822 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4823 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4824 gimplify_and_add (t, pre_p);
4826 for (i = 0; i < XVECLEN (container, 0); i++)
4828 rtx slot = XVECEXP (container, 0, i);
4829 rtx reg = XEXP (slot, 0);
4830 enum machine_mode mode = GET_MODE (reg);
4831 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4832 tree addr_type = build_pointer_type (piece_type);
4835 tree dest_addr, dest;
4837 if (SSE_REGNO_P (REGNO (reg)))
4839 src_addr = sse_addr;
4840 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4844 src_addr = int_addr;
4845 src_offset = REGNO (reg) * 8;
4847 src_addr = fold_convert (addr_type, src_addr);
4848 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4849 size_int (src_offset));
4850 src = build_va_arg_indirect_ref (src_addr);
4852 dest_addr = fold_convert (addr_type, addr);
4853 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4854 size_int (INTVAL (XEXP (slot, 1))));
4855 dest = build_va_arg_indirect_ref (dest_addr);
4857 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4858 gimplify_and_add (t, pre_p);
4864 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4865 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4866 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4867 gimplify_and_add (t, pre_p);
4871 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4872 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4873 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4874 gimplify_and_add (t, pre_p);
4877 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4878 gimplify_and_add (t, pre_p);
4880 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4881 append_to_statement_list (t, pre_p);
4884 /* ... otherwise out of the overflow area. */
4886 /* Care for on-stack alignment if needed. */
4887 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4888 || integer_zerop (TYPE_SIZE (type)))
4892 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4893 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4894 build_int_cst (TREE_TYPE (ovf), align - 1));
4895 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4896 build_int_cst (TREE_TYPE (t), -align));
4898 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4900 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4901 gimplify_and_add (t2, pre_p);
4903 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4904 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4905 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4906 gimplify_and_add (t, pre_p);
4910 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4911 append_to_statement_list (t, pre_p);
4914 ptrtype = build_pointer_type (type);
4915 addr = fold_convert (ptrtype, addr);
4918 addr = build_va_arg_indirect_ref (addr);
4919 return build_va_arg_indirect_ref (addr);
4922 /* Return nonzero if OPNUM's MEM should be matched
4923 in movabs* patterns. */
4926 ix86_check_movabs (rtx insn, int opnum)
4930 set = PATTERN (insn);
4931 if (GET_CODE (set) == PARALLEL)
4932 set = XVECEXP (set, 0, 0);
4933 gcc_assert (GET_CODE (set) == SET);
4934 mem = XEXP (set, opnum);
4935 while (GET_CODE (mem) == SUBREG)
4936 mem = SUBREG_REG (mem);
4937 gcc_assert (MEM_P (mem));
4938 return (volatile_ok || !MEM_VOLATILE_P (mem));
4941 /* Initialize the table of extra 80387 mathematical constants. */
4944 init_ext_80387_constants (void)
4946 static const char * cst[5] =
4948 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4949 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4950 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4951 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4952 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4956 for (i = 0; i < 5; i++)
4958 real_from_string (&ext_80387_constants_table[i], cst[i]);
4959 /* Ensure each constant is rounded to XFmode precision. */
4960 real_convert (&ext_80387_constants_table[i],
4961 XFmode, &ext_80387_constants_table[i]);
4964 ext_80387_constants_init = 1;
4967 /* Return true if the constant is something that can be loaded with
4968 a special instruction. */
4971 standard_80387_constant_p (rtx x)
4973 enum machine_mode mode = GET_MODE (x);
4977 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
4980 if (x == CONST0_RTX (mode))
4982 if (x == CONST1_RTX (mode))
4985 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4987 /* For XFmode constants, try to find a special 80387 instruction when
4988 optimizing for size or on those CPUs that benefit from them. */
4990 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4994 if (! ext_80387_constants_init)
4995 init_ext_80387_constants ();
4997 for (i = 0; i < 5; i++)
4998 if (real_identical (&r, &ext_80387_constants_table[i]))
5002 /* Load of the constant -0.0 or -1.0 will be split as
5003 fldz;fchs or fld1;fchs sequence. */
5004 if (real_isnegzero (&r))
5006 if (real_identical (&r, &dconstm1))
5012 /* Return the opcode of the special instruction to be used to load
5016 standard_80387_constant_opcode (rtx x)
5018 switch (standard_80387_constant_p (x))
5042 /* Return the CONST_DOUBLE representing the 80387 constant that is
5043 loaded by the specified special instruction. The argument IDX
5044 matches the return value from standard_80387_constant_p. */
5047 standard_80387_constant_rtx (int idx)
5051 if (! ext_80387_constants_init)
5052 init_ext_80387_constants ();
5068 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5072 /* Return 1 if mode is a valid mode for sse. */
5074 standard_sse_mode_p (enum machine_mode mode)
5091 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5094 standard_sse_constant_p (rtx x)
5096 enum machine_mode mode = GET_MODE (x);
5098 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5100 if (vector_all_ones_operand (x, mode)
5101 && standard_sse_mode_p (mode))
5102 return TARGET_SSE2 ? 2 : -1;
5107 /* Return the opcode of the special instruction to be used to load
5111 standard_sse_constant_opcode (rtx insn, rtx x)
5113 switch (standard_sse_constant_p (x))
5116 if (get_attr_mode (insn) == MODE_V4SF)
5117 return "xorps\t%0, %0";
5118 else if (get_attr_mode (insn) == MODE_V2DF)
5119 return "xorpd\t%0, %0";
5121 return "pxor\t%0, %0";
5123 return "pcmpeqd\t%0, %0";
5128 /* Returns 1 if OP contains a symbol reference */
5131 symbolic_reference_mentioned_p (rtx op)
5136 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5139 fmt = GET_RTX_FORMAT (GET_CODE (op));
5140 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5146 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5147 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5151 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5158 /* Return 1 if it is appropriate to emit `ret' instructions in the
5159 body of a function. Do this only if the epilogue is simple, needing a
5160 couple of insns. Prior to reloading, we can't tell how many registers
5161 must be saved, so return 0 then. Return 0 if there is no frame
5162 marker to de-allocate. */
5165 ix86_can_use_return_insn_p (void)
5167 struct ix86_frame frame;
5169 if (! reload_completed || frame_pointer_needed)
5172 /* Don't allow more than 32 pop, since that's all we can do
5173 with one instruction. */
5174 if (current_function_pops_args
5175 && current_function_args_size >= 32768)
5178 ix86_compute_frame_layout (&frame);
5179 return frame.to_allocate == 0 && frame.nregs == 0;
5182 /* Value should be nonzero if functions must have frame pointers.
5183 Zero means the frame pointer need not be set up (and parms may
5184 be accessed via the stack pointer) in functions that seem suitable. */
5187 ix86_frame_pointer_required (void)
5189 /* If we accessed previous frames, then the generated code expects
5190 to be able to access the saved ebp value in our frame. */
5191 if (cfun->machine->accesses_prev_frame)
5194 /* Several x86 os'es need a frame pointer for other reasons,
5195 usually pertaining to setjmp. */
5196 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5199 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5200 the frame pointer by default. Turn it back on now if we've not
5201 got a leaf function. */
5202 if (TARGET_OMIT_LEAF_FRAME_POINTER
5203 && (!current_function_is_leaf
5204 || ix86_current_function_calls_tls_descriptor))
5207 if (current_function_profile)
5213 /* Record that the current function accesses previous call frames. */
5216 ix86_setup_frame_addresses (void)
5218 cfun->machine->accesses_prev_frame = 1;
5221 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5222 # define USE_HIDDEN_LINKONCE 1
5224 # define USE_HIDDEN_LINKONCE 0
5227 static int pic_labels_used;
5229 /* Fills in the label name that should be used for a pc thunk for
5230 the given register. */
5233 get_pc_thunk_name (char name[32], unsigned int regno)
5235 gcc_assert (!TARGET_64BIT);
5237 if (USE_HIDDEN_LINKONCE)
5238 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5240 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5244 /* This function generates code for -fpic that loads %ebx with
5245 the return address of the caller and then returns. */
5248 ix86_file_end (void)
5253 for (regno = 0; regno < 8; ++regno)
5257 if (! ((pic_labels_used >> regno) & 1))
5260 get_pc_thunk_name (name, regno);
5265 switch_to_section (darwin_sections[text_coal_section]);
5266 fputs ("\t.weak_definition\t", asm_out_file);
5267 assemble_name (asm_out_file, name);
5268 fputs ("\n\t.private_extern\t", asm_out_file);
5269 assemble_name (asm_out_file, name);
5270 fputs ("\n", asm_out_file);
5271 ASM_OUTPUT_LABEL (asm_out_file, name);
5275 if (USE_HIDDEN_LINKONCE)
5279 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5281 TREE_PUBLIC (decl) = 1;
5282 TREE_STATIC (decl) = 1;
5283 DECL_ONE_ONLY (decl) = 1;
5285 (*targetm.asm_out.unique_section) (decl, 0);
5286 switch_to_section (get_named_section (decl, NULL, 0));
5288 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5289 fputs ("\t.hidden\t", asm_out_file);
5290 assemble_name (asm_out_file, name);
5291 fputc ('\n', asm_out_file);
5292 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5296 switch_to_section (text_section);
5297 ASM_OUTPUT_LABEL (asm_out_file, name);
5300 xops[0] = gen_rtx_REG (SImode, regno);
5301 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5302 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5303 output_asm_insn ("ret", xops);
5306 if (NEED_INDICATE_EXEC_STACK)
5307 file_end_indicate_exec_stack ();
5310 /* Emit code for the SET_GOT patterns. */
5313 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5319 if (TARGET_VXWORKS_RTP && flag_pic)
5321 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5322 xops[2] = gen_rtx_MEM (Pmode,
5323 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5324 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5326 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5327 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5328 an unadorned address. */
5329 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5330 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5331 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5335 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5337 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5339 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5342 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5344 output_asm_insn ("call\t%a2", xops);
5347 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5348 is what will be referenced by the Mach-O PIC subsystem. */
5350 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5353 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5354 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5357 output_asm_insn ("pop{l}\t%0", xops);
5362 get_pc_thunk_name (name, REGNO (dest));
5363 pic_labels_used |= 1 << REGNO (dest);
5365 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5366 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5367 output_asm_insn ("call\t%X2", xops);
5368 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5369 is what will be referenced by the Mach-O PIC subsystem. */
5372 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5374 targetm.asm_out.internal_label (asm_out_file, "L",
5375 CODE_LABEL_NUMBER (label));
5382 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5383 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5385 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5390 /* Generate an "push" pattern for input ARG. */
5395 return gen_rtx_SET (VOIDmode,
5397 gen_rtx_PRE_DEC (Pmode,
5398 stack_pointer_rtx)),
5402 /* Return >= 0 if there is an unused call-clobbered register available
5403 for the entire function. */
5406 ix86_select_alt_pic_regnum (void)
5408 if (current_function_is_leaf && !current_function_profile
5409 && !ix86_current_function_calls_tls_descriptor)
5412 for (i = 2; i >= 0; --i)
5413 if (!regs_ever_live[i])
5417 return INVALID_REGNUM;
5420 /* Return 1 if we need to save REGNO. */
5422 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5424 if (pic_offset_table_rtx
5425 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5426 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5427 || current_function_profile
5428 || current_function_calls_eh_return
5429 || current_function_uses_const_pool))
5431 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5436 if (current_function_calls_eh_return && maybe_eh_return)
5441 unsigned test = EH_RETURN_DATA_REGNO (i);
5442 if (test == INVALID_REGNUM)
5449 if (cfun->machine->force_align_arg_pointer
5450 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5453 return (regs_ever_live[regno]
5454 && !call_used_regs[regno]
5455 && !fixed_regs[regno]
5456 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5459 /* Return number of registers to be saved on the stack. */
5462 ix86_nsaved_regs (void)
5467 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5468 if (ix86_save_reg (regno, true))
5473 /* Return the offset between two registers, one to be eliminated, and the other
5474 its replacement, at the start of a routine. */
5477 ix86_initial_elimination_offset (int from, int to)
5479 struct ix86_frame frame;
5480 ix86_compute_frame_layout (&frame);
5482 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5483 return frame.hard_frame_pointer_offset;
5484 else if (from == FRAME_POINTER_REGNUM
5485 && to == HARD_FRAME_POINTER_REGNUM)
5486 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5489 gcc_assert (to == STACK_POINTER_REGNUM);
5491 if (from == ARG_POINTER_REGNUM)
5492 return frame.stack_pointer_offset;
5494 gcc_assert (from == FRAME_POINTER_REGNUM);
5495 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5499 /* Fill structure ix86_frame about frame of currently computed function. */
5502 ix86_compute_frame_layout (struct ix86_frame *frame)
5504 HOST_WIDE_INT total_size;
5505 unsigned int stack_alignment_needed;
5506 HOST_WIDE_INT offset;
5507 unsigned int preferred_alignment;
5508 HOST_WIDE_INT size = get_frame_size ();
5510 frame->nregs = ix86_nsaved_regs ();
5513 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5514 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5516 /* During reload iteration the amount of registers saved can change.
5517 Recompute the value as needed. Do not recompute when amount of registers
5518 didn't change as reload does multiple calls to the function and does not
5519 expect the decision to change within single iteration. */
5521 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5523 int count = frame->nregs;
5525 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5526 /* The fast prologue uses move instead of push to save registers. This
5527 is significantly longer, but also executes faster as modern hardware
5528 can execute the moves in parallel, but can't do that for push/pop.
5530 Be careful about choosing what prologue to emit: When function takes
5531 many instructions to execute we may use slow version as well as in
5532 case function is known to be outside hot spot (this is known with
5533 feedback only). Weight the size of function by number of registers
5534 to save as it is cheap to use one or two push instructions but very
5535 slow to use many of them. */
5537 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5538 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5539 || (flag_branch_probabilities
5540 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5541 cfun->machine->use_fast_prologue_epilogue = false;
5543 cfun->machine->use_fast_prologue_epilogue
5544 = !expensive_function_p (count);
5546 if (TARGET_PROLOGUE_USING_MOVE
5547 && cfun->machine->use_fast_prologue_epilogue)
5548 frame->save_regs_using_mov = true;
5550 frame->save_regs_using_mov = false;
5553 /* Skip return address and saved base pointer. */
5554 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5556 frame->hard_frame_pointer_offset = offset;
5558 /* Do some sanity checking of stack_alignment_needed and
5559 preferred_alignment, since i386 port is the only using those features
5560 that may break easily. */
5562 gcc_assert (!size || stack_alignment_needed);
5563 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5564 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5565 gcc_assert (stack_alignment_needed
5566 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5568 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5569 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5571 /* Register save area */
5572 offset += frame->nregs * UNITS_PER_WORD;
5575 if (ix86_save_varrargs_registers)
5577 offset += X86_64_VARARGS_SIZE;
5578 frame->va_arg_size = X86_64_VARARGS_SIZE;
5581 frame->va_arg_size = 0;
5583 /* Align start of frame for local function. */
5584 frame->padding1 = ((offset + stack_alignment_needed - 1)
5585 & -stack_alignment_needed) - offset;
5587 offset += frame->padding1;
5589 /* Frame pointer points here. */
5590 frame->frame_pointer_offset = offset;
5594 /* Add outgoing arguments area. Can be skipped if we eliminated
5595 all the function calls as dead code.
5596 Skipping is however impossible when function calls alloca. Alloca
5597 expander assumes that last current_function_outgoing_args_size
5598 of stack frame are unused. */
5599 if (ACCUMULATE_OUTGOING_ARGS
5600 && (!current_function_is_leaf || current_function_calls_alloca
5601 || ix86_current_function_calls_tls_descriptor))
5603 offset += current_function_outgoing_args_size;
5604 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5607 frame->outgoing_arguments_size = 0;
5609 /* Align stack boundary. Only needed if we're calling another function
5611 if (!current_function_is_leaf || current_function_calls_alloca
5612 || ix86_current_function_calls_tls_descriptor)
5613 frame->padding2 = ((offset + preferred_alignment - 1)
5614 & -preferred_alignment) - offset;
5616 frame->padding2 = 0;
5618 offset += frame->padding2;
5620 /* We've reached end of stack frame. */
5621 frame->stack_pointer_offset = offset;
5623 /* Size prologue needs to allocate. */
5624 frame->to_allocate =
5625 (size + frame->padding1 + frame->padding2
5626 + frame->outgoing_arguments_size + frame->va_arg_size);
5628 if ((!frame->to_allocate && frame->nregs <= 1)
5629 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5630 frame->save_regs_using_mov = false;
5632 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5633 && current_function_is_leaf
5634 && !ix86_current_function_calls_tls_descriptor)
5636 frame->red_zone_size = frame->to_allocate;
5637 if (frame->save_regs_using_mov)
5638 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5639 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5640 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5643 frame->red_zone_size = 0;
5644 frame->to_allocate -= frame->red_zone_size;
5645 frame->stack_pointer_offset -= frame->red_zone_size;
5647 fprintf (stderr, "\n");
5648 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5649 fprintf (stderr, "size: %ld\n", (long)size);
5650 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5651 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5652 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5653 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5654 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5655 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5656 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5657 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5658 (long)frame->hard_frame_pointer_offset);
5659 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5660 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5661 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5662 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5666 /* Emit code to save registers in the prologue. */
5669 ix86_emit_save_regs (void)
5674 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5675 if (ix86_save_reg (regno, true))
5677 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5678 RTX_FRAME_RELATED_P (insn) = 1;
5682 /* Emit code to save registers using MOV insns. First register
5683 is restored from POINTER + OFFSET. */
5685 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5690 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5691 if (ix86_save_reg (regno, true))
5693 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5695 gen_rtx_REG (Pmode, regno));
5696 RTX_FRAME_RELATED_P (insn) = 1;
5697 offset += UNITS_PER_WORD;
5701 /* Expand prologue or epilogue stack adjustment.
5702 The pattern exist to put a dependency on all ebp-based memory accesses.
5703 STYLE should be negative if instructions should be marked as frame related,
5704 zero if %r11 register is live and cannot be freely used and positive
5708 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5713 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5714 else if (x86_64_immediate_operand (offset, DImode))
5715 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5719 /* r11 is used by indirect sibcall return as well, set before the
5720 epilogue and used after the epilogue. ATM indirect sibcall
5721 shouldn't be used together with huge frame sizes in one
5722 function because of the frame_size check in sibcall.c. */
5724 r11 = gen_rtx_REG (DImode, R11_REG);
5725 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5727 RTX_FRAME_RELATED_P (insn) = 1;
5728 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5732 RTX_FRAME_RELATED_P (insn) = 1;
5735 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5738 ix86_internal_arg_pointer (void)
5740 bool has_force_align_arg_pointer =
5741 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5742 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5743 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5744 && DECL_NAME (current_function_decl)
5745 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5746 && DECL_FILE_SCOPE_P (current_function_decl))
5747 || ix86_force_align_arg_pointer
5748 || has_force_align_arg_pointer)
5750 /* Nested functions can't realign the stack due to a register
5752 if (DECL_CONTEXT (current_function_decl)
5753 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5755 if (ix86_force_align_arg_pointer)
5756 warning (0, "-mstackrealign ignored for nested functions");
5757 if (has_force_align_arg_pointer)
5758 error ("%s not supported for nested functions",
5759 ix86_force_align_arg_pointer_string);
5760 return virtual_incoming_args_rtx;
5762 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5763 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5766 return virtual_incoming_args_rtx;
5769 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5770 This is called from dwarf2out.c to emit call frame instructions
5771 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5773 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5775 rtx unspec = SET_SRC (pattern);
5776 gcc_assert (GET_CODE (unspec) == UNSPEC);
5780 case UNSPEC_REG_SAVE:
5781 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5782 SET_DEST (pattern));
5784 case UNSPEC_DEF_CFA:
5785 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5786 INTVAL (XVECEXP (unspec, 0, 0)));
5793 /* Expand the prologue into a bunch of separate insns. */
5796 ix86_expand_prologue (void)
5800 struct ix86_frame frame;
5801 HOST_WIDE_INT allocate;
5803 ix86_compute_frame_layout (&frame);
5805 if (cfun->machine->force_align_arg_pointer)
5809 /* Grab the argument pointer. */
5810 x = plus_constant (stack_pointer_rtx, 4);
5811 y = cfun->machine->force_align_arg_pointer;
5812 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5813 RTX_FRAME_RELATED_P (insn) = 1;
5815 /* The unwind info consists of two parts: install the fafp as the cfa,
5816 and record the fafp as the "save register" of the stack pointer.
5817 The later is there in order that the unwinder can see where it
5818 should restore the stack pointer across the and insn. */
5819 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5820 x = gen_rtx_SET (VOIDmode, y, x);
5821 RTX_FRAME_RELATED_P (x) = 1;
5822 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5824 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5825 RTX_FRAME_RELATED_P (y) = 1;
5826 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5827 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5828 REG_NOTES (insn) = x;
5830 /* Align the stack. */
5831 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5834 /* And here we cheat like madmen with the unwind info. We force the
5835 cfa register back to sp+4, which is exactly what it was at the
5836 start of the function. Re-pushing the return address results in
5837 the return at the same spot relative to the cfa, and thus is
5838 correct wrt the unwind info. */
5839 x = cfun->machine->force_align_arg_pointer;
5840 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5841 insn = emit_insn (gen_push (x));
5842 RTX_FRAME_RELATED_P (insn) = 1;
5845 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5846 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5847 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5848 REG_NOTES (insn) = x;
5851 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5852 slower on all targets. Also sdb doesn't like it. */
5854 if (frame_pointer_needed)
5856 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5857 RTX_FRAME_RELATED_P (insn) = 1;
5859 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5860 RTX_FRAME_RELATED_P (insn) = 1;
5863 allocate = frame.to_allocate;
5865 if (!frame.save_regs_using_mov)
5866 ix86_emit_save_regs ();
5868 allocate += frame.nregs * UNITS_PER_WORD;
5870 /* When using red zone we may start register saving before allocating
5871 the stack frame saving one cycle of the prologue. */
5872 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5873 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5874 : stack_pointer_rtx,
5875 -frame.nregs * UNITS_PER_WORD);
5879 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5880 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5881 GEN_INT (-allocate), -1);
5884 /* Only valid for Win32. */
5885 rtx eax = gen_rtx_REG (Pmode, 0);
5889 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5891 if (TARGET_64BIT_MS_ABI)
5894 eax_live = ix86_eax_live_at_start_p ();
5898 emit_insn (gen_push (eax));
5899 allocate -= UNITS_PER_WORD;
5902 emit_move_insn (eax, GEN_INT (allocate));
5905 insn = gen_allocate_stack_worker_64 (eax);
5907 insn = gen_allocate_stack_worker_32 (eax);
5908 insn = emit_insn (insn);
5909 RTX_FRAME_RELATED_P (insn) = 1;
5910 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5911 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5912 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5913 t, REG_NOTES (insn));
5917 if (frame_pointer_needed)
5918 t = plus_constant (hard_frame_pointer_rtx,
5921 - frame.nregs * UNITS_PER_WORD);
5923 t = plus_constant (stack_pointer_rtx, allocate);
5924 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
5928 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5930 if (!frame_pointer_needed || !frame.to_allocate)
5931 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5933 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5934 -frame.nregs * UNITS_PER_WORD);
5937 pic_reg_used = false;
5938 if (pic_offset_table_rtx
5939 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5940 || current_function_profile))
5942 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5944 if (alt_pic_reg_used != INVALID_REGNUM)
5945 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5947 pic_reg_used = true;
5954 if (ix86_cmodel == CM_LARGE_PIC)
5956 rtx tmp_reg = gen_rtx_REG (DImode,
5957 FIRST_REX_INT_REG + 3 /* R11 */);
5958 rtx label = gen_label_rtx ();
5960 LABEL_PRESERVE_P (label) = 1;
5961 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5962 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5963 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5964 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5965 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5966 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5967 pic_offset_table_rtx, tmp_reg));
5970 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5973 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5975 /* Even with accurate pre-reload life analysis, we can wind up
5976 deleting all references to the pic register after reload.
5977 Consider if cross-jumping unifies two sides of a branch
5978 controlled by a comparison vs the only read from a global.
5979 In which case, allow the set_got to be deleted, though we're
5980 too late to do anything about the ebx save in the prologue. */
5981 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5984 /* Prevent function calls from be scheduled before the call to mcount.
5985 In the pic_reg_used case, make sure that the got load isn't deleted. */
5986 if (current_function_profile)
5987 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5990 /* Emit code to restore saved registers using MOV insns. First register
5991 is restored from POINTER + OFFSET. */
5993 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5994 int maybe_eh_return)
5997 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5999 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6000 if (ix86_save_reg (regno, maybe_eh_return))
6002 /* Ensure that adjust_address won't be forced to produce pointer
6003 out of range allowed by x86-64 instruction set. */
6004 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6008 r11 = gen_rtx_REG (DImode, R11_REG);
6009 emit_move_insn (r11, GEN_INT (offset));
6010 emit_insn (gen_adddi3 (r11, r11, pointer));
6011 base_address = gen_rtx_MEM (Pmode, r11);
6014 emit_move_insn (gen_rtx_REG (Pmode, regno),
6015 adjust_address (base_address, Pmode, offset));
6016 offset += UNITS_PER_WORD;
6020 /* Restore function stack, frame, and registers. */
6023 ix86_expand_epilogue (int style)
6026 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6027 struct ix86_frame frame;
6028 HOST_WIDE_INT offset;
6030 ix86_compute_frame_layout (&frame);
6032 /* Calculate start of saved registers relative to ebp. Special care
6033 must be taken for the normal return case of a function using
6034 eh_return: the eax and edx registers are marked as saved, but not
6035 restored along this path. */
6036 offset = frame.nregs;
6037 if (current_function_calls_eh_return && style != 2)
6039 offset *= -UNITS_PER_WORD;
6041 /* If we're only restoring one register and sp is not valid then
6042 using a move instruction to restore the register since it's
6043 less work than reloading sp and popping the register.
6045 The default code result in stack adjustment using add/lea instruction,
6046 while this code results in LEAVE instruction (or discrete equivalent),
6047 so it is profitable in some other cases as well. Especially when there
6048 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6049 and there is exactly one register to pop. This heuristic may need some
6050 tuning in future. */
6051 if ((!sp_valid && frame.nregs <= 1)
6052 || (TARGET_EPILOGUE_USING_MOVE
6053 && cfun->machine->use_fast_prologue_epilogue
6054 && (frame.nregs > 1 || frame.to_allocate))
6055 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6056 || (frame_pointer_needed && TARGET_USE_LEAVE
6057 && cfun->machine->use_fast_prologue_epilogue
6058 && frame.nregs == 1)
6059 || current_function_calls_eh_return)
6061 /* Restore registers. We can use ebp or esp to address the memory
6062 locations. If both are available, default to ebp, since offsets
6063 are known to be small. Only exception is esp pointing directly to the
6064 end of block of saved registers, where we may simplify addressing
6067 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6068 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6069 frame.to_allocate, style == 2);
6071 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6072 offset, style == 2);
6074 /* eh_return epilogues need %ecx added to the stack pointer. */
6077 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6079 if (frame_pointer_needed)
6081 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6082 tmp = plus_constant (tmp, UNITS_PER_WORD);
6083 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6085 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6086 emit_move_insn (hard_frame_pointer_rtx, tmp);
6088 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6093 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6094 tmp = plus_constant (tmp, (frame.to_allocate
6095 + frame.nregs * UNITS_PER_WORD));
6096 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6099 else if (!frame_pointer_needed)
6100 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6101 GEN_INT (frame.to_allocate
6102 + frame.nregs * UNITS_PER_WORD),
6104 /* If not an i386, mov & pop is faster than "leave". */
6105 else if (TARGET_USE_LEAVE || optimize_size
6106 || !cfun->machine->use_fast_prologue_epilogue)
6107 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6110 pro_epilogue_adjust_stack (stack_pointer_rtx,
6111 hard_frame_pointer_rtx,
6114 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6116 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6121 /* First step is to deallocate the stack frame so that we can
6122 pop the registers. */
6125 gcc_assert (frame_pointer_needed);
6126 pro_epilogue_adjust_stack (stack_pointer_rtx,
6127 hard_frame_pointer_rtx,
6128 GEN_INT (offset), style);
6130 else if (frame.to_allocate)
6131 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6132 GEN_INT (frame.to_allocate), style);
6134 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6135 if (ix86_save_reg (regno, false))
6138 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6140 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6142 if (frame_pointer_needed)
6144 /* Leave results in shorter dependency chains on CPUs that are
6145 able to grok it fast. */
6146 if (TARGET_USE_LEAVE)
6147 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6148 else if (TARGET_64BIT)
6149 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6151 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6155 if (cfun->machine->force_align_arg_pointer)
6157 emit_insn (gen_addsi3 (stack_pointer_rtx,
6158 cfun->machine->force_align_arg_pointer,
6162 /* Sibcall epilogues don't want a return instruction. */
6166 if (current_function_pops_args && current_function_args_size)
6168 rtx popc = GEN_INT (current_function_pops_args);
6170 /* i386 can only pop 64K bytes. If asked to pop more, pop
6171 return address, do explicit add, and jump indirectly to the
6174 if (current_function_pops_args >= 65536)
6176 rtx ecx = gen_rtx_REG (SImode, 2);
6178 /* There is no "pascal" calling convention in any 64bit ABI. */
6179 gcc_assert (!TARGET_64BIT);
6181 emit_insn (gen_popsi1 (ecx));
6182 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6183 emit_jump_insn (gen_return_indirect_internal (ecx));
6186 emit_jump_insn (gen_return_pop_internal (popc));
6189 emit_jump_insn (gen_return_internal ());
6192 /* Reset from the function's potential modifications. */
6195 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6196 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6198 if (pic_offset_table_rtx)
6199 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6201 /* Mach-O doesn't support labels at the end of objects, so if
6202 it looks like we might want one, insert a NOP. */
6204 rtx insn = get_last_insn ();
6207 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6208 insn = PREV_INSN (insn);
6212 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6213 fputs ("\tnop\n", file);
6219 /* Extract the parts of an RTL expression that is a valid memory address
6220 for an instruction. Return 0 if the structure of the address is
6221 grossly off. Return -1 if the address contains ASHIFT, so it is not
6222 strictly valid, but still used for computing length of lea instruction. */
6225 ix86_decompose_address (rtx addr, struct ix86_address *out)
6227 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6228 rtx base_reg, index_reg;
6229 HOST_WIDE_INT scale = 1;
6230 rtx scale_rtx = NULL_RTX;
6232 enum ix86_address_seg seg = SEG_DEFAULT;
6234 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6236 else if (GET_CODE (addr) == PLUS)
6246 addends[n++] = XEXP (op, 1);
6249 while (GET_CODE (op) == PLUS);
6254 for (i = n; i >= 0; --i)
6257 switch (GET_CODE (op))
6262 index = XEXP (op, 0);
6263 scale_rtx = XEXP (op, 1);
6267 if (XINT (op, 1) == UNSPEC_TP
6268 && TARGET_TLS_DIRECT_SEG_REFS
6269 && seg == SEG_DEFAULT)
6270 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6299 else if (GET_CODE (addr) == MULT)
6301 index = XEXP (addr, 0); /* index*scale */
6302 scale_rtx = XEXP (addr, 1);
6304 else if (GET_CODE (addr) == ASHIFT)
6308 /* We're called for lea too, which implements ashift on occasion. */
6309 index = XEXP (addr, 0);
6310 tmp = XEXP (addr, 1);
6311 if (!CONST_INT_P (tmp))
6313 scale = INTVAL (tmp);
6314 if ((unsigned HOST_WIDE_INT) scale > 3)
6320 disp = addr; /* displacement */
6322 /* Extract the integral value of scale. */
6325 if (!CONST_INT_P (scale_rtx))
6327 scale = INTVAL (scale_rtx);
6330 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6331 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6333 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6334 if (base_reg && index_reg && scale == 1
6335 && (index_reg == arg_pointer_rtx
6336 || index_reg == frame_pointer_rtx
6337 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6340 tmp = base, base = index, index = tmp;
6341 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6344 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6345 if ((base_reg == hard_frame_pointer_rtx
6346 || base_reg == frame_pointer_rtx
6347 || base_reg == arg_pointer_rtx) && !disp)
6350 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6351 Avoid this by transforming to [%esi+0]. */
6352 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6353 && base_reg && !index_reg && !disp
6355 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6358 /* Special case: encode reg+reg instead of reg*2. */
6359 if (!base && index && scale && scale == 2)
6360 base = index, base_reg = index_reg, scale = 1;
6362 /* Special case: scaling cannot be encoded without base or displacement. */
6363 if (!base && !disp && index && scale != 1)
6375 /* Return cost of the memory address x.
6376 For i386, it is better to use a complex address than let gcc copy
6377 the address into a reg and make a new pseudo. But not if the address
6378 requires to two regs - that would mean more pseudos with longer
6381 ix86_address_cost (rtx x)
6383 struct ix86_address parts;
6385 int ok = ix86_decompose_address (x, &parts);
6389 if (parts.base && GET_CODE (parts.base) == SUBREG)
6390 parts.base = SUBREG_REG (parts.base);
6391 if (parts.index && GET_CODE (parts.index) == SUBREG)
6392 parts.index = SUBREG_REG (parts.index);
6394 /* More complex memory references are better. */
6395 if (parts.disp && parts.disp != const0_rtx)
6397 if (parts.seg != SEG_DEFAULT)
6400 /* Attempt to minimize number of registers in the address. */
6402 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6404 && (!REG_P (parts.index)
6405 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6409 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6411 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6412 && parts.base != parts.index)
6415 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6416 since it's predecode logic can't detect the length of instructions
6417 and it degenerates to vector decoded. Increase cost of such
6418 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6419 to split such addresses or even refuse such addresses at all.
6421 Following addressing modes are affected:
6426 The first and last case may be avoidable by explicitly coding the zero in
6427 memory address, but I don't have AMD-K6 machine handy to check this
6431 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6432 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6433 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6439 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6440 this is used for to form addresses to local data when -fPIC is in
6444 darwin_local_data_pic (rtx disp)
6446 if (GET_CODE (disp) == MINUS)
6448 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6449 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6450 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6452 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6453 if (! strcmp (sym_name, "<pic base>"))
6461 /* Determine if a given RTX is a valid constant. We already know this
6462 satisfies CONSTANT_P. */
6465 legitimate_constant_p (rtx x)
6467 switch (GET_CODE (x))
6472 if (GET_CODE (x) == PLUS)
6474 if (!CONST_INT_P (XEXP (x, 1)))
6479 if (TARGET_MACHO && darwin_local_data_pic (x))
6482 /* Only some unspecs are valid as "constants". */
6483 if (GET_CODE (x) == UNSPEC)
6484 switch (XINT (x, 1))
6489 return TARGET_64BIT;
6492 x = XVECEXP (x, 0, 0);
6493 return (GET_CODE (x) == SYMBOL_REF
6494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6496 x = XVECEXP (x, 0, 0);
6497 return (GET_CODE (x) == SYMBOL_REF
6498 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6503 /* We must have drilled down to a symbol. */
6504 if (GET_CODE (x) == LABEL_REF)
6506 if (GET_CODE (x) != SYMBOL_REF)
6511 /* TLS symbols are never valid. */
6512 if (SYMBOL_REF_TLS_MODEL (x))
6515 /* DLLIMPORT symbols are never valid. */
6516 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6517 && SYMBOL_REF_DLLIMPORT_P (x))
6522 if (GET_MODE (x) == TImode
6523 && x != CONST0_RTX (TImode)
6529 if (x == CONST0_RTX (GET_MODE (x)))
6537 /* Otherwise we handle everything else in the move patterns. */
6541 /* Determine if it's legal to put X into the constant pool. This
6542 is not possible for the address of thread-local symbols, which
6543 is checked above. */
6546 ix86_cannot_force_const_mem (rtx x)
6548 /* We can always put integral constants and vectors in memory. */
6549 switch (GET_CODE (x))
6559 return !legitimate_constant_p (x);
6562 /* Determine if a given RTX is a valid constant address. */
6565 constant_address_p (rtx x)
6567 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6570 /* Nonzero if the constant value X is a legitimate general operand
6571 when generating PIC code. It is given that flag_pic is on and
6572 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6575 legitimate_pic_operand_p (rtx x)
6579 switch (GET_CODE (x))
6582 inner = XEXP (x, 0);
6583 if (GET_CODE (inner) == PLUS
6584 && CONST_INT_P (XEXP (inner, 1)))
6585 inner = XEXP (inner, 0);
6587 /* Only some unspecs are valid as "constants". */
6588 if (GET_CODE (inner) == UNSPEC)
6589 switch (XINT (inner, 1))
6594 return TARGET_64BIT;
6596 x = XVECEXP (inner, 0, 0);
6597 return (GET_CODE (x) == SYMBOL_REF
6598 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6606 return legitimate_pic_address_disp_p (x);
6613 /* Determine if a given CONST RTX is a valid memory displacement
6617 legitimate_pic_address_disp_p (rtx disp)
6621 /* In 64bit mode we can allow direct addresses of symbols and labels
6622 when they are not dynamic symbols. */
6625 rtx op0 = disp, op1;
6627 switch (GET_CODE (disp))
6633 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6635 op0 = XEXP (XEXP (disp, 0), 0);
6636 op1 = XEXP (XEXP (disp, 0), 1);
6637 if (!CONST_INT_P (op1)
6638 || INTVAL (op1) >= 16*1024*1024
6639 || INTVAL (op1) < -16*1024*1024)
6641 if (GET_CODE (op0) == LABEL_REF)
6643 if (GET_CODE (op0) != SYMBOL_REF)
6648 /* TLS references should always be enclosed in UNSPEC. */
6649 if (SYMBOL_REF_TLS_MODEL (op0))
6651 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6652 && ix86_cmodel != CM_LARGE_PIC)
6660 if (GET_CODE (disp) != CONST)
6662 disp = XEXP (disp, 0);
6666 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6667 of GOT tables. We should not need these anyway. */
6668 if (GET_CODE (disp) != UNSPEC
6669 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6670 && XINT (disp, 1) != UNSPEC_GOTOFF
6671 && XINT (disp, 1) != UNSPEC_PLTOFF))
6674 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6675 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6681 if (GET_CODE (disp) == PLUS)
6683 if (!CONST_INT_P (XEXP (disp, 1)))
6685 disp = XEXP (disp, 0);
6689 if (TARGET_MACHO && darwin_local_data_pic (disp))
6692 if (GET_CODE (disp) != UNSPEC)
6695 switch (XINT (disp, 1))
6700 /* We need to check for both symbols and labels because VxWorks loads
6701 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6703 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6704 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6706 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6707 While ABI specify also 32bit relocation but we don't produce it in
6708 small PIC model at all. */
6709 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6710 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6712 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6714 case UNSPEC_GOTTPOFF:
6715 case UNSPEC_GOTNTPOFF:
6716 case UNSPEC_INDNTPOFF:
6719 disp = XVECEXP (disp, 0, 0);
6720 return (GET_CODE (disp) == SYMBOL_REF
6721 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6723 disp = XVECEXP (disp, 0, 0);
6724 return (GET_CODE (disp) == SYMBOL_REF
6725 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6727 disp = XVECEXP (disp, 0, 0);
6728 return (GET_CODE (disp) == SYMBOL_REF
6729 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6735 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6736 memory address for an instruction. The MODE argument is the machine mode
6737 for the MEM expression that wants to use this address.
6739 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6740 convert common non-canonical forms to canonical form so that they will
6744 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6745 rtx addr, int strict)
6747 struct ix86_address parts;
6748 rtx base, index, disp;
6749 HOST_WIDE_INT scale;
6750 const char *reason = NULL;
6751 rtx reason_rtx = NULL_RTX;
6753 if (ix86_decompose_address (addr, &parts) <= 0)
6755 reason = "decomposition failed";
6760 index = parts.index;
6762 scale = parts.scale;
6764 /* Validate base register.
6766 Don't allow SUBREG's that span more than a word here. It can lead to spill
6767 failures when the base is one word out of a two word structure, which is
6768 represented internally as a DImode int. */
6777 else if (GET_CODE (base) == SUBREG
6778 && REG_P (SUBREG_REG (base))
6779 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6781 reg = SUBREG_REG (base);
6784 reason = "base is not a register";
6788 if (GET_MODE (base) != Pmode)
6790 reason = "base is not in Pmode";
6794 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6795 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6797 reason = "base is not valid";
6802 /* Validate index register.
6804 Don't allow SUBREG's that span more than a word here -- same as above. */
6813 else if (GET_CODE (index) == SUBREG
6814 && REG_P (SUBREG_REG (index))
6815 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6817 reg = SUBREG_REG (index);
6820 reason = "index is not a register";
6824 if (GET_MODE (index) != Pmode)
6826 reason = "index is not in Pmode";
6830 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6831 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6833 reason = "index is not valid";
6838 /* Validate scale factor. */
6841 reason_rtx = GEN_INT (scale);
6844 reason = "scale without index";
6848 if (scale != 2 && scale != 4 && scale != 8)
6850 reason = "scale is not a valid multiplier";
6855 /* Validate displacement. */
6860 if (GET_CODE (disp) == CONST
6861 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6862 switch (XINT (XEXP (disp, 0), 1))
6864 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6865 used. While ABI specify also 32bit relocations, we don't produce
6866 them at all and use IP relative instead. */
6869 gcc_assert (flag_pic);
6871 goto is_legitimate_pic;
6872 reason = "64bit address unspec";
6875 case UNSPEC_GOTPCREL:
6876 gcc_assert (flag_pic);
6877 goto is_legitimate_pic;
6879 case UNSPEC_GOTTPOFF:
6880 case UNSPEC_GOTNTPOFF:
6881 case UNSPEC_INDNTPOFF:
6887 reason = "invalid address unspec";
6891 else if (SYMBOLIC_CONST (disp)
6895 && MACHOPIC_INDIRECT
6896 && !machopic_operand_p (disp)
6902 if (TARGET_64BIT && (index || base))
6904 /* foo@dtpoff(%rX) is ok. */
6905 if (GET_CODE (disp) != CONST
6906 || GET_CODE (XEXP (disp, 0)) != PLUS
6907 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6908 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6909 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6910 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6912 reason = "non-constant pic memory reference";
6916 else if (! legitimate_pic_address_disp_p (disp))
6918 reason = "displacement is an invalid pic construct";
6922 /* This code used to verify that a symbolic pic displacement
6923 includes the pic_offset_table_rtx register.
6925 While this is good idea, unfortunately these constructs may
6926 be created by "adds using lea" optimization for incorrect
6935 This code is nonsensical, but results in addressing
6936 GOT table with pic_offset_table_rtx base. We can't
6937 just refuse it easily, since it gets matched by
6938 "addsi3" pattern, that later gets split to lea in the
6939 case output register differs from input. While this
6940 can be handled by separate addsi pattern for this case
6941 that never results in lea, this seems to be easier and
6942 correct fix for crash to disable this test. */
6944 else if (GET_CODE (disp) != LABEL_REF
6945 && !CONST_INT_P (disp)
6946 && (GET_CODE (disp) != CONST
6947 || !legitimate_constant_p (disp))
6948 && (GET_CODE (disp) != SYMBOL_REF
6949 || !legitimate_constant_p (disp)))
6951 reason = "displacement is not constant";
6954 else if (TARGET_64BIT
6955 && !x86_64_immediate_operand (disp, VOIDmode))
6957 reason = "displacement is out of range";
6962 /* Everything looks valid. */
6969 /* Return a unique alias set for the GOT. */
6971 static HOST_WIDE_INT
6972 ix86_GOT_alias_set (void)
6974 static HOST_WIDE_INT set = -1;
6976 set = new_alias_set ();
6980 /* Return a legitimate reference for ORIG (an address) using the
6981 register REG. If REG is 0, a new pseudo is generated.
6983 There are two types of references that must be handled:
6985 1. Global data references must load the address from the GOT, via
6986 the PIC reg. An insn is emitted to do this load, and the reg is
6989 2. Static data references, constant pool addresses, and code labels
6990 compute the address as an offset from the GOT, whose base is in
6991 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6992 differentiate them from global data objects. The returned
6993 address is the PIC reg + an unspec constant.
6995 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6996 reg also appears in the address. */
6999 legitimize_pic_address (rtx orig, rtx reg)
7006 if (TARGET_MACHO && !TARGET_64BIT)
7009 reg = gen_reg_rtx (Pmode);
7010 /* Use the generic Mach-O PIC machinery. */
7011 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7015 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7017 else if (TARGET_64BIT
7018 && ix86_cmodel != CM_SMALL_PIC
7019 && gotoff_operand (addr, Pmode))
7022 /* This symbol may be referenced via a displacement from the PIC
7023 base address (@GOTOFF). */
7025 if (reload_in_progress)
7026 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7027 if (GET_CODE (addr) == CONST)
7028 addr = XEXP (addr, 0);
7029 if (GET_CODE (addr) == PLUS)
7031 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7033 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7036 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7037 new = gen_rtx_CONST (Pmode, new);
7039 tmpreg = gen_reg_rtx (Pmode);
7042 emit_move_insn (tmpreg, new);
7046 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7047 tmpreg, 1, OPTAB_DIRECT);
7050 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7052 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7054 /* This symbol may be referenced via a displacement from the PIC
7055 base address (@GOTOFF). */
7057 if (reload_in_progress)
7058 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7059 if (GET_CODE (addr) == CONST)
7060 addr = XEXP (addr, 0);
7061 if (GET_CODE (addr) == PLUS)
7063 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7065 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7068 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7069 new = gen_rtx_CONST (Pmode, new);
7070 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7074 emit_move_insn (reg, new);
7078 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7079 /* We can't use @GOTOFF for text labels on VxWorks;
7080 see gotoff_operand. */
7081 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7083 /* Given that we've already handled dllimport variables separately
7084 in legitimize_address, and all other variables should satisfy
7085 legitimate_pic_address_disp_p, we should never arrive here. */
7086 gcc_assert (!TARGET_64BIT_MS_ABI);
7088 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7090 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7091 new = gen_rtx_CONST (Pmode, new);
7092 new = gen_const_mem (Pmode, new);
7093 set_mem_alias_set (new, ix86_GOT_alias_set ());
7096 reg = gen_reg_rtx (Pmode);
7097 /* Use directly gen_movsi, otherwise the address is loaded
7098 into register for CSE. We don't want to CSE this addresses,
7099 instead we CSE addresses from the GOT table, so skip this. */
7100 emit_insn (gen_movsi (reg, new));
7105 /* This symbol must be referenced via a load from the
7106 Global Offset Table (@GOT). */
7108 if (reload_in_progress)
7109 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7110 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7111 new = gen_rtx_CONST (Pmode, new);
7113 new = force_reg (Pmode, new);
7114 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7115 new = gen_const_mem (Pmode, new);
7116 set_mem_alias_set (new, ix86_GOT_alias_set ());
7119 reg = gen_reg_rtx (Pmode);
7120 emit_move_insn (reg, new);
7126 if (CONST_INT_P (addr)
7127 && !x86_64_immediate_operand (addr, VOIDmode))
7131 emit_move_insn (reg, addr);
7135 new = force_reg (Pmode, addr);
7137 else if (GET_CODE (addr) == CONST)
7139 addr = XEXP (addr, 0);
7141 /* We must match stuff we generate before. Assume the only
7142 unspecs that can get here are ours. Not that we could do
7143 anything with them anyway.... */
7144 if (GET_CODE (addr) == UNSPEC
7145 || (GET_CODE (addr) == PLUS
7146 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7148 gcc_assert (GET_CODE (addr) == PLUS);
7150 if (GET_CODE (addr) == PLUS)
7152 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7154 /* Check first to see if this is a constant offset from a @GOTOFF
7155 symbol reference. */
7156 if (gotoff_operand (op0, Pmode)
7157 && CONST_INT_P (op1))
7161 if (reload_in_progress)
7162 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7163 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7165 new = gen_rtx_PLUS (Pmode, new, op1);
7166 new = gen_rtx_CONST (Pmode, new);
7167 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7171 emit_move_insn (reg, new);
7177 if (INTVAL (op1) < -16*1024*1024
7178 || INTVAL (op1) >= 16*1024*1024)
7180 if (!x86_64_immediate_operand (op1, Pmode))
7181 op1 = force_reg (Pmode, op1);
7182 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7188 base = legitimize_pic_address (XEXP (addr, 0), reg);
7189 new = legitimize_pic_address (XEXP (addr, 1),
7190 base == reg ? NULL_RTX : reg);
7192 if (CONST_INT_P (new))
7193 new = plus_constant (base, INTVAL (new));
7196 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7198 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7199 new = XEXP (new, 1);
7201 new = gen_rtx_PLUS (Pmode, base, new);
7209 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7212 get_thread_pointer (int to_reg)
7216 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7220 reg = gen_reg_rtx (Pmode);
7221 insn = gen_rtx_SET (VOIDmode, reg, tp);
7222 insn = emit_insn (insn);
7227 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7228 false if we expect this to be used for a memory address and true if
7229 we expect to load the address into a register. */
7232 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7234 rtx dest, base, off, pic, tp;
7239 case TLS_MODEL_GLOBAL_DYNAMIC:
7240 dest = gen_reg_rtx (Pmode);
7241 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7243 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7245 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7248 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7249 insns = get_insns ();
7252 emit_libcall_block (insns, dest, rax, x);
7254 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7255 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7257 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7259 if (TARGET_GNU2_TLS)
7261 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7263 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7267 case TLS_MODEL_LOCAL_DYNAMIC:
7268 base = gen_reg_rtx (Pmode);
7269 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7271 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7273 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7276 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7277 insns = get_insns ();
7280 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7281 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7282 emit_libcall_block (insns, base, rax, note);
7284 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7285 emit_insn (gen_tls_local_dynamic_base_64 (base));
7287 emit_insn (gen_tls_local_dynamic_base_32 (base));
7289 if (TARGET_GNU2_TLS)
7291 rtx x = ix86_tls_module_base ();
7293 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7294 gen_rtx_MINUS (Pmode, x, tp));
7297 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7298 off = gen_rtx_CONST (Pmode, off);
7300 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7302 if (TARGET_GNU2_TLS)
7304 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7306 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7311 case TLS_MODEL_INITIAL_EXEC:
7315 type = UNSPEC_GOTNTPOFF;
7319 if (reload_in_progress)
7320 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7321 pic = pic_offset_table_rtx;
7322 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7324 else if (!TARGET_ANY_GNU_TLS)
7326 pic = gen_reg_rtx (Pmode);
7327 emit_insn (gen_set_got (pic));
7328 type = UNSPEC_GOTTPOFF;
7333 type = UNSPEC_INDNTPOFF;
7336 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7337 off = gen_rtx_CONST (Pmode, off);
7339 off = gen_rtx_PLUS (Pmode, pic, off);
7340 off = gen_const_mem (Pmode, off);
7341 set_mem_alias_set (off, ix86_GOT_alias_set ());
7343 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7345 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7346 off = force_reg (Pmode, off);
7347 return gen_rtx_PLUS (Pmode, base, off);
7351 base = get_thread_pointer (true);
7352 dest = gen_reg_rtx (Pmode);
7353 emit_insn (gen_subsi3 (dest, base, off));
7357 case TLS_MODEL_LOCAL_EXEC:
7358 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7359 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7360 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7361 off = gen_rtx_CONST (Pmode, off);
7363 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7365 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7366 return gen_rtx_PLUS (Pmode, base, off);
7370 base = get_thread_pointer (true);
7371 dest = gen_reg_rtx (Pmode);
7372 emit_insn (gen_subsi3 (dest, base, off));
7383 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7386 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7387 htab_t dllimport_map;
7390 get_dllimport_decl (tree decl)
7392 struct tree_map *h, in;
7396 size_t namelen, prefixlen;
7402 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7404 in.hash = htab_hash_pointer (decl);
7405 in.base.from = decl;
7406 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7411 *loc = h = ggc_alloc (sizeof (struct tree_map));
7413 h->base.from = decl;
7414 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7415 DECL_ARTIFICIAL (to) = 1;
7416 DECL_IGNORED_P (to) = 1;
7417 DECL_EXTERNAL (to) = 1;
7418 TREE_READONLY (to) = 1;
7420 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7421 name = targetm.strip_name_encoding (name);
7422 if (name[0] == FASTCALL_PREFIX)
7428 prefix = "*__imp__";
7430 namelen = strlen (name);
7431 prefixlen = strlen (prefix);
7432 imp_name = alloca (namelen + prefixlen + 1);
7433 memcpy (imp_name, prefix, prefixlen);
7434 memcpy (imp_name + prefixlen, name, namelen + 1);
7436 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7437 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7438 SET_SYMBOL_REF_DECL (rtl, to);
7439 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7441 rtl = gen_const_mem (Pmode, rtl);
7442 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7444 SET_DECL_RTL (to, rtl);
7449 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7450 true if we require the result be a register. */
7453 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7458 gcc_assert (SYMBOL_REF_DECL (symbol));
7459 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7461 x = DECL_RTL (imp_decl);
7463 x = force_reg (Pmode, x);
7467 /* Try machine-dependent ways of modifying an illegitimate address
7468 to be legitimate. If we find one, return the new, valid address.
7469 This macro is used in only one place: `memory_address' in explow.c.
7471 OLDX is the address as it was before break_out_memory_refs was called.
7472 In some cases it is useful to look at this to decide what needs to be done.
7474 MODE and WIN are passed so that this macro can use
7475 GO_IF_LEGITIMATE_ADDRESS.
7477 It is always safe for this macro to do nothing. It exists to recognize
7478 opportunities to optimize the output.
7480 For the 80386, we handle X+REG by loading X into a register R and
7481 using R+REG. R will go in a general reg and indexing will be used.
7482 However, if REG is a broken-out memory address or multiplication,
7483 nothing needs to be done because REG can certainly go in a general reg.
7485 When -fpic is used, special handling is needed for symbolic references.
7486 See comments by legitimize_pic_address in i386.c for details. */
7489 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7494 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7496 return legitimize_tls_address (x, log, false);
7497 if (GET_CODE (x) == CONST
7498 && GET_CODE (XEXP (x, 0)) == PLUS
7499 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7500 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7502 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7503 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7506 if (flag_pic && SYMBOLIC_CONST (x))
7507 return legitimize_pic_address (x, 0);
7509 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7511 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7512 return legitimize_dllimport_symbol (x, true);
7513 if (GET_CODE (x) == CONST
7514 && GET_CODE (XEXP (x, 0)) == PLUS
7515 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7516 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7518 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7519 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7523 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7524 if (GET_CODE (x) == ASHIFT
7525 && CONST_INT_P (XEXP (x, 1))
7526 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7529 log = INTVAL (XEXP (x, 1));
7530 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7531 GEN_INT (1 << log));
7534 if (GET_CODE (x) == PLUS)
7536 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7538 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7539 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7540 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7543 log = INTVAL (XEXP (XEXP (x, 0), 1));
7544 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7545 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7546 GEN_INT (1 << log));
7549 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7550 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7551 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7554 log = INTVAL (XEXP (XEXP (x, 1), 1));
7555 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7556 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7557 GEN_INT (1 << log));
7560 /* Put multiply first if it isn't already. */
7561 if (GET_CODE (XEXP (x, 1)) == MULT)
7563 rtx tmp = XEXP (x, 0);
7564 XEXP (x, 0) = XEXP (x, 1);
7569 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7570 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7571 created by virtual register instantiation, register elimination, and
7572 similar optimizations. */
7573 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7576 x = gen_rtx_PLUS (Pmode,
7577 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7578 XEXP (XEXP (x, 1), 0)),
7579 XEXP (XEXP (x, 1), 1));
7583 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7584 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7585 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7586 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7587 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7588 && CONSTANT_P (XEXP (x, 1)))
7591 rtx other = NULL_RTX;
7593 if (CONST_INT_P (XEXP (x, 1)))
7595 constant = XEXP (x, 1);
7596 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7598 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7600 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7601 other = XEXP (x, 1);
7609 x = gen_rtx_PLUS (Pmode,
7610 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7611 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7612 plus_constant (other, INTVAL (constant)));
7616 if (changed && legitimate_address_p (mode, x, FALSE))
7619 if (GET_CODE (XEXP (x, 0)) == MULT)
7622 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7625 if (GET_CODE (XEXP (x, 1)) == MULT)
7628 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7632 && REG_P (XEXP (x, 1))
7633 && REG_P (XEXP (x, 0)))
7636 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7639 x = legitimize_pic_address (x, 0);
7642 if (changed && legitimate_address_p (mode, x, FALSE))
7645 if (REG_P (XEXP (x, 0)))
7647 rtx temp = gen_reg_rtx (Pmode);
7648 rtx val = force_operand (XEXP (x, 1), temp);
7650 emit_move_insn (temp, val);
7656 else if (REG_P (XEXP (x, 1)))
7658 rtx temp = gen_reg_rtx (Pmode);
7659 rtx val = force_operand (XEXP (x, 0), temp);
7661 emit_move_insn (temp, val);
7671 /* Print an integer constant expression in assembler syntax. Addition
7672 and subtraction are the only arithmetic that may appear in these
7673 expressions. FILE is the stdio stream to write to, X is the rtx, and
7674 CODE is the operand print code from the output string. */
7677 output_pic_addr_const (FILE *file, rtx x, int code)
7681 switch (GET_CODE (x))
7684 gcc_assert (flag_pic);
7689 if (! TARGET_MACHO || TARGET_64BIT)
7690 output_addr_const (file, x);
7693 const char *name = XSTR (x, 0);
7695 /* Mark the decl as referenced so that cgraph will
7696 output the function. */
7697 if (SYMBOL_REF_DECL (x))
7698 mark_decl_referenced (SYMBOL_REF_DECL (x));
7701 if (MACHOPIC_INDIRECT
7702 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7703 name = machopic_indirection_name (x, /*stub_p=*/true);
7705 assemble_name (file, name);
7707 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7708 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7709 fputs ("@PLT", file);
7716 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7717 assemble_name (asm_out_file, buf);
7721 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7725 /* This used to output parentheses around the expression,
7726 but that does not work on the 386 (either ATT or BSD assembler). */
7727 output_pic_addr_const (file, XEXP (x, 0), code);
7731 if (GET_MODE (x) == VOIDmode)
7733 /* We can use %d if the number is <32 bits and positive. */
7734 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7735 fprintf (file, "0x%lx%08lx",
7736 (unsigned long) CONST_DOUBLE_HIGH (x),
7737 (unsigned long) CONST_DOUBLE_LOW (x));
7739 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7742 /* We can't handle floating point constants;
7743 PRINT_OPERAND must handle them. */
7744 output_operand_lossage ("floating constant misused");
7748 /* Some assemblers need integer constants to appear first. */
7749 if (CONST_INT_P (XEXP (x, 0)))
7751 output_pic_addr_const (file, XEXP (x, 0), code);
7753 output_pic_addr_const (file, XEXP (x, 1), code);
7757 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7758 output_pic_addr_const (file, XEXP (x, 1), code);
7760 output_pic_addr_const (file, XEXP (x, 0), code);
7766 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7767 output_pic_addr_const (file, XEXP (x, 0), code);
7769 output_pic_addr_const (file, XEXP (x, 1), code);
7771 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7775 gcc_assert (XVECLEN (x, 0) == 1);
7776 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7777 switch (XINT (x, 1))
7780 fputs ("@GOT", file);
7783 fputs ("@GOTOFF", file);
7786 fputs ("@PLTOFF", file);
7788 case UNSPEC_GOTPCREL:
7789 fputs ("@GOTPCREL(%rip)", file);
7791 case UNSPEC_GOTTPOFF:
7792 /* FIXME: This might be @TPOFF in Sun ld too. */
7793 fputs ("@GOTTPOFF", file);
7796 fputs ("@TPOFF", file);
7800 fputs ("@TPOFF", file);
7802 fputs ("@NTPOFF", file);
7805 fputs ("@DTPOFF", file);
7807 case UNSPEC_GOTNTPOFF:
7809 fputs ("@GOTTPOFF(%rip)", file);
7811 fputs ("@GOTNTPOFF", file);
7813 case UNSPEC_INDNTPOFF:
7814 fputs ("@INDNTPOFF", file);
7817 output_operand_lossage ("invalid UNSPEC as operand");
7823 output_operand_lossage ("invalid expression as operand");
7827 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7828 We need to emit DTP-relative relocations. */
7830 static void ATTRIBUTE_UNUSED
7831 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7833 fputs (ASM_LONG, file);
7834 output_addr_const (file, x);
7835 fputs ("@DTPOFF", file);
7841 fputs (", 0", file);
7848 /* In the name of slightly smaller debug output, and to cater to
7849 general assembler lossage, recognize PIC+GOTOFF and turn it back
7850 into a direct symbol reference.
7852 On Darwin, this is necessary to avoid a crash, because Darwin
7853 has a different PIC label for each routine but the DWARF debugging
7854 information is not associated with any particular routine, so it's
7855 necessary to remove references to the PIC label from RTL stored by
7856 the DWARF output code. */
7859 ix86_delegitimize_address (rtx orig_x)
7862 /* reg_addend is NULL or a multiple of some register. */
7863 rtx reg_addend = NULL_RTX;
7864 /* const_addend is NULL or a const_int. */
7865 rtx const_addend = NULL_RTX;
7866 /* This is the result, or NULL. */
7867 rtx result = NULL_RTX;
7874 if (GET_CODE (x) != CONST
7875 || GET_CODE (XEXP (x, 0)) != UNSPEC
7876 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7879 return XVECEXP (XEXP (x, 0), 0, 0);
7882 if (GET_CODE (x) != PLUS
7883 || GET_CODE (XEXP (x, 1)) != CONST)
7886 if (REG_P (XEXP (x, 0))
7887 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7888 /* %ebx + GOT/GOTOFF */
7890 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7892 /* %ebx + %reg * scale + GOT/GOTOFF */
7893 reg_addend = XEXP (x, 0);
7894 if (REG_P (XEXP (reg_addend, 0))
7895 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7896 reg_addend = XEXP (reg_addend, 1);
7897 else if (REG_P (XEXP (reg_addend, 1))
7898 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7899 reg_addend = XEXP (reg_addend, 0);
7902 if (!REG_P (reg_addend)
7903 && GET_CODE (reg_addend) != MULT
7904 && GET_CODE (reg_addend) != ASHIFT)
7910 x = XEXP (XEXP (x, 1), 0);
7911 if (GET_CODE (x) == PLUS
7912 && CONST_INT_P (XEXP (x, 1)))
7914 const_addend = XEXP (x, 1);
7918 if (GET_CODE (x) == UNSPEC
7919 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7920 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7921 result = XVECEXP (x, 0, 0);
7923 if (TARGET_MACHO && darwin_local_data_pic (x)
7925 result = XEXP (x, 0);
7931 result = gen_rtx_PLUS (Pmode, result, const_addend);
7933 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7937 /* If X is a machine specific address (i.e. a symbol or label being
7938 referenced as a displacement from the GOT implemented using an
7939 UNSPEC), then return the base term. Otherwise return X. */
7942 ix86_find_base_term (rtx x)
7948 if (GET_CODE (x) != CONST)
7951 if (GET_CODE (term) == PLUS
7952 && (CONST_INT_P (XEXP (term, 1))
7953 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7954 term = XEXP (term, 0);
7955 if (GET_CODE (term) != UNSPEC
7956 || XINT (term, 1) != UNSPEC_GOTPCREL)
7959 term = XVECEXP (term, 0, 0);
7961 if (GET_CODE (term) != SYMBOL_REF
7962 && GET_CODE (term) != LABEL_REF)
7968 term = ix86_delegitimize_address (x);
7970 if (GET_CODE (term) != SYMBOL_REF
7971 && GET_CODE (term) != LABEL_REF)
7978 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7983 if (mode == CCFPmode || mode == CCFPUmode)
7985 enum rtx_code second_code, bypass_code;
7986 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7987 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7988 code = ix86_fp_compare_code_to_integer (code);
7992 code = reverse_condition (code);
8003 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8007 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8008 Those same assemblers have the same but opposite lossage on cmov. */
8009 gcc_assert (mode == CCmode);
8010 suffix = fp ? "nbe" : "a";
8030 gcc_assert (mode == CCmode);
8052 gcc_assert (mode == CCmode);
8053 suffix = fp ? "nb" : "ae";
8056 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8060 gcc_assert (mode == CCmode);
8064 suffix = fp ? "u" : "p";
8067 suffix = fp ? "nu" : "np";
8072 fputs (suffix, file);
8075 /* Print the name of register X to FILE based on its machine mode and number.
8076 If CODE is 'w', pretend the mode is HImode.
8077 If CODE is 'b', pretend the mode is QImode.
8078 If CODE is 'k', pretend the mode is SImode.
8079 If CODE is 'q', pretend the mode is DImode.
8080 If CODE is 'h', pretend the reg is the 'high' byte register.
8081 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8084 print_reg (rtx x, int code, FILE *file)
8086 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8087 && REGNO (x) != FRAME_POINTER_REGNUM
8088 && REGNO (x) != FLAGS_REG
8089 && REGNO (x) != FPSR_REG
8090 && REGNO (x) != FPCR_REG);
8092 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8095 if (code == 'w' || MMX_REG_P (x))
8097 else if (code == 'b')
8099 else if (code == 'k')
8101 else if (code == 'q')
8103 else if (code == 'y')
8105 else if (code == 'h')
8108 code = GET_MODE_SIZE (GET_MODE (x));
8110 /* Irritatingly, AMD extended registers use different naming convention
8111 from the normal registers. */
8112 if (REX_INT_REG_P (x))
8114 gcc_assert (TARGET_64BIT);
8118 error ("extended registers have no high halves");
8121 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8124 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8127 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8130 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8133 error ("unsupported operand size for extended register");
8141 if (STACK_TOP_P (x))
8143 fputs ("st(0)", file);
8150 if (! ANY_FP_REG_P (x))
8151 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8156 fputs (hi_reg_name[REGNO (x)], file);
8159 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8161 fputs (qi_reg_name[REGNO (x)], file);
8164 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8166 fputs (qi_high_reg_name[REGNO (x)], file);
8173 /* Locate some local-dynamic symbol still in use by this function
8174 so that we can print its name in some tls_local_dynamic_base
8178 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8182 if (GET_CODE (x) == SYMBOL_REF
8183 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8185 cfun->machine->some_ld_name = XSTR (x, 0);
8193 get_some_local_dynamic_name (void)
8197 if (cfun->machine->some_ld_name)
8198 return cfun->machine->some_ld_name;
8200 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8202 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8203 return cfun->machine->some_ld_name;
8209 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8210 C -- print opcode suffix for set/cmov insn.
8211 c -- like C, but print reversed condition
8212 F,f -- likewise, but for floating-point.
8213 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8215 R -- print the prefix for register names.
8216 z -- print the opcode suffix for the size of the current operand.
8217 * -- print a star (in certain assembler syntax)
8218 A -- print an absolute memory reference.
8219 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8220 s -- print a shift double count, followed by the assemblers argument
8222 b -- print the QImode name of the register for the indicated operand.
8223 %b0 would print %al if operands[0] is reg 0.
8224 w -- likewise, print the HImode name of the register.
8225 k -- likewise, print the SImode name of the register.
8226 q -- likewise, print the DImode name of the register.
8227 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8228 y -- print "st(0)" instead of "st" as a register.
8229 D -- print condition for SSE cmp instruction.
8230 P -- if PIC, print an @PLT suffix.
8231 X -- don't print any sort of PIC '@' suffix for a symbol.
8232 & -- print some in-use local-dynamic symbol name.
8233 H -- print a memory address offset by 8; used for sse high-parts
8237 print_operand (FILE *file, rtx x, int code)
8244 if (ASSEMBLER_DIALECT == ASM_ATT)
8249 assemble_name (file, get_some_local_dynamic_name ());
8253 switch (ASSEMBLER_DIALECT)
8260 /* Intel syntax. For absolute addresses, registers should not
8261 be surrounded by braces. */
8265 PRINT_OPERAND (file, x, 0);
8275 PRINT_OPERAND (file, x, 0);
8280 if (ASSEMBLER_DIALECT == ASM_ATT)
8285 if (ASSEMBLER_DIALECT == ASM_ATT)
8290 if (ASSEMBLER_DIALECT == ASM_ATT)
8295 if (ASSEMBLER_DIALECT == ASM_ATT)
8300 if (ASSEMBLER_DIALECT == ASM_ATT)
8305 if (ASSEMBLER_DIALECT == ASM_ATT)
8310 /* 387 opcodes don't get size suffixes if the operands are
8312 if (STACK_REG_P (x))
8315 /* Likewise if using Intel opcodes. */
8316 if (ASSEMBLER_DIALECT == ASM_INTEL)
8319 /* This is the size of op from size of operand. */
8320 switch (GET_MODE_SIZE (GET_MODE (x)))
8327 #ifdef HAVE_GAS_FILDS_FISTS
8333 if (GET_MODE (x) == SFmode)
8348 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8350 #ifdef GAS_MNEMONICS
8376 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8378 PRINT_OPERAND (file, x, 0);
8384 /* Little bit of braindamage here. The SSE compare instructions
8385 does use completely different names for the comparisons that the
8386 fp conditional moves. */
8387 switch (GET_CODE (x))
8402 fputs ("unord", file);
8406 fputs ("neq", file);
8410 fputs ("nlt", file);
8414 fputs ("nle", file);
8417 fputs ("ord", file);
8424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8425 if (ASSEMBLER_DIALECT == ASM_ATT)
8427 switch (GET_MODE (x))
8429 case HImode: putc ('w', file); break;
8431 case SFmode: putc ('l', file); break;
8433 case DFmode: putc ('q', file); break;
8434 default: gcc_unreachable ();
8441 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8444 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8445 if (ASSEMBLER_DIALECT == ASM_ATT)
8448 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8451 /* Like above, but reverse condition */
8453 /* Check to see if argument to %c is really a constant
8454 and not a condition code which needs to be reversed. */
8455 if (!COMPARISON_P (x))
8457 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8460 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8463 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8464 if (ASSEMBLER_DIALECT == ASM_ATT)
8467 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8471 /* It doesn't actually matter what mode we use here, as we're
8472 only going to use this for printing. */
8473 x = adjust_address_nv (x, DImode, 8);
8480 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8483 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8486 int pred_val = INTVAL (XEXP (x, 0));
8488 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8489 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8491 int taken = pred_val > REG_BR_PROB_BASE / 2;
8492 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8494 /* Emit hints only in the case default branch prediction
8495 heuristics would fail. */
8496 if (taken != cputaken)
8498 /* We use 3e (DS) prefix for taken branches and
8499 2e (CS) prefix for not taken branches. */
8501 fputs ("ds ; ", file);
8503 fputs ("cs ; ", file);
8510 output_operand_lossage ("invalid operand code '%c'", code);
8515 print_reg (x, code, file);
8519 /* No `byte ptr' prefix for call instructions. */
8520 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8523 switch (GET_MODE_SIZE (GET_MODE (x)))
8525 case 1: size = "BYTE"; break;
8526 case 2: size = "WORD"; break;
8527 case 4: size = "DWORD"; break;
8528 case 8: size = "QWORD"; break;
8529 case 12: size = "XWORD"; break;
8530 case 16: size = "XMMWORD"; break;
8535 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8538 else if (code == 'w')
8540 else if (code == 'k')
8544 fputs (" PTR ", file);
8548 /* Avoid (%rip) for call operands. */
8549 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8550 && !CONST_INT_P (x))
8551 output_addr_const (file, x);
8552 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8553 output_operand_lossage ("invalid constraints for operand");
8558 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8563 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8564 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8566 if (ASSEMBLER_DIALECT == ASM_ATT)
8568 fprintf (file, "0x%08lx", l);
8571 /* These float cases don't actually occur as immediate operands. */
8572 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8576 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8577 fprintf (file, "%s", dstr);
8580 else if (GET_CODE (x) == CONST_DOUBLE
8581 && GET_MODE (x) == XFmode)
8585 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8586 fprintf (file, "%s", dstr);
8591 /* We have patterns that allow zero sets of memory, for instance.
8592 In 64-bit mode, we should probably support all 8-byte vectors,
8593 since we can in fact encode that into an immediate. */
8594 if (GET_CODE (x) == CONST_VECTOR)
8596 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8602 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8604 if (ASSEMBLER_DIALECT == ASM_ATT)
8607 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8608 || GET_CODE (x) == LABEL_REF)
8610 if (ASSEMBLER_DIALECT == ASM_ATT)
8613 fputs ("OFFSET FLAT:", file);
8616 if (CONST_INT_P (x))
8617 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8619 output_pic_addr_const (file, x, code);
8621 output_addr_const (file, x);
8625 /* Print a memory operand whose address is ADDR. */
8628 print_operand_address (FILE *file, rtx addr)
8630 struct ix86_address parts;
8631 rtx base, index, disp;
8633 int ok = ix86_decompose_address (addr, &parts);
8638 index = parts.index;
8640 scale = parts.scale;
8648 if (USER_LABEL_PREFIX[0] == 0)
8650 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8656 if (!base && !index)
8658 /* Displacement only requires special attention. */
8660 if (CONST_INT_P (disp))
8662 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8664 if (USER_LABEL_PREFIX[0] == 0)
8666 fputs ("ds:", file);
8668 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8671 output_pic_addr_const (file, disp, 0);
8673 output_addr_const (file, disp);
8675 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8678 if (GET_CODE (disp) == CONST
8679 && GET_CODE (XEXP (disp, 0)) == PLUS
8680 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8681 disp = XEXP (XEXP (disp, 0), 0);
8682 if (GET_CODE (disp) == LABEL_REF
8683 || (GET_CODE (disp) == SYMBOL_REF
8684 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8685 fputs ("(%rip)", file);
8690 if (ASSEMBLER_DIALECT == ASM_ATT)
8695 output_pic_addr_const (file, disp, 0);
8696 else if (GET_CODE (disp) == LABEL_REF)
8697 output_asm_label (disp);
8699 output_addr_const (file, disp);
8704 print_reg (base, 0, file);
8708 print_reg (index, 0, file);
8710 fprintf (file, ",%d", scale);
8716 rtx offset = NULL_RTX;
8720 /* Pull out the offset of a symbol; print any symbol itself. */
8721 if (GET_CODE (disp) == CONST
8722 && GET_CODE (XEXP (disp, 0)) == PLUS
8723 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8725 offset = XEXP (XEXP (disp, 0), 1);
8726 disp = gen_rtx_CONST (VOIDmode,
8727 XEXP (XEXP (disp, 0), 0));
8731 output_pic_addr_const (file, disp, 0);
8732 else if (GET_CODE (disp) == LABEL_REF)
8733 output_asm_label (disp);
8734 else if (CONST_INT_P (disp))
8737 output_addr_const (file, disp);
8743 print_reg (base, 0, file);
8746 if (INTVAL (offset) >= 0)
8748 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8752 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8759 print_reg (index, 0, file);
8761 fprintf (file, "*%d", scale);
8769 output_addr_const_extra (FILE *file, rtx x)
8773 if (GET_CODE (x) != UNSPEC)
8776 op = XVECEXP (x, 0, 0);
8777 switch (XINT (x, 1))
8779 case UNSPEC_GOTTPOFF:
8780 output_addr_const (file, op);
8781 /* FIXME: This might be @TPOFF in Sun ld. */
8782 fputs ("@GOTTPOFF", file);
8785 output_addr_const (file, op);
8786 fputs ("@TPOFF", file);
8789 output_addr_const (file, op);
8791 fputs ("@TPOFF", file);
8793 fputs ("@NTPOFF", file);
8796 output_addr_const (file, op);
8797 fputs ("@DTPOFF", file);
8799 case UNSPEC_GOTNTPOFF:
8800 output_addr_const (file, op);
8802 fputs ("@GOTTPOFF(%rip)", file);
8804 fputs ("@GOTNTPOFF", file);
8806 case UNSPEC_INDNTPOFF:
8807 output_addr_const (file, op);
8808 fputs ("@INDNTPOFF", file);
8818 /* Split one or more DImode RTL references into pairs of SImode
8819 references. The RTL can be REG, offsettable MEM, integer constant, or
8820 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8821 split and "num" is its length. lo_half and hi_half are output arrays
8822 that parallel "operands". */
8825 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8829 rtx op = operands[num];
8831 /* simplify_subreg refuse to split volatile memory addresses,
8832 but we still have to handle it. */
8835 lo_half[num] = adjust_address (op, SImode, 0);
8836 hi_half[num] = adjust_address (op, SImode, 4);
8840 lo_half[num] = simplify_gen_subreg (SImode, op,
8841 GET_MODE (op) == VOIDmode
8842 ? DImode : GET_MODE (op), 0);
8843 hi_half[num] = simplify_gen_subreg (SImode, op,
8844 GET_MODE (op) == VOIDmode
8845 ? DImode : GET_MODE (op), 4);
8849 /* Split one or more TImode RTL references into pairs of DImode
8850 references. The RTL can be REG, offsettable MEM, integer constant, or
8851 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8852 split and "num" is its length. lo_half and hi_half are output arrays
8853 that parallel "operands". */
8856 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8860 rtx op = operands[num];
8862 /* simplify_subreg refuse to split volatile memory addresses, but we
8863 still have to handle it. */
8866 lo_half[num] = adjust_address (op, DImode, 0);
8867 hi_half[num] = adjust_address (op, DImode, 8);
8871 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8872 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8877 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8878 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8879 is the expression of the binary operation. The output may either be
8880 emitted here, or returned to the caller, like all output_* functions.
8882 There is no guarantee that the operands are the same mode, as they
8883 might be within FLOAT or FLOAT_EXTEND expressions. */
8885 #ifndef SYSV386_COMPAT
8886 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8887 wants to fix the assemblers because that causes incompatibility
8888 with gcc. No-one wants to fix gcc because that causes
8889 incompatibility with assemblers... You can use the option of
8890 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8891 #define SYSV386_COMPAT 1
8895 output_387_binary_op (rtx insn, rtx *operands)
8897 static char buf[30];
8900 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8902 #ifdef ENABLE_CHECKING
8903 /* Even if we do not want to check the inputs, this documents input
8904 constraints. Which helps in understanding the following code. */
8905 if (STACK_REG_P (operands[0])
8906 && ((REG_P (operands[1])
8907 && REGNO (operands[0]) == REGNO (operands[1])
8908 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8909 || (REG_P (operands[2])
8910 && REGNO (operands[0]) == REGNO (operands[2])
8911 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8912 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8915 gcc_assert (is_sse);
8918 switch (GET_CODE (operands[3]))
8921 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8922 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8930 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8931 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8939 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8940 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8948 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8949 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8963 if (GET_MODE (operands[0]) == SFmode)
8964 strcat (buf, "ss\t{%2, %0|%0, %2}");
8966 strcat (buf, "sd\t{%2, %0|%0, %2}");
8971 switch (GET_CODE (operands[3]))
8975 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8977 rtx temp = operands[2];
8978 operands[2] = operands[1];
8982 /* know operands[0] == operands[1]. */
8984 if (MEM_P (operands[2]))
8990 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8992 if (STACK_TOP_P (operands[0]))
8993 /* How is it that we are storing to a dead operand[2]?
8994 Well, presumably operands[1] is dead too. We can't
8995 store the result to st(0) as st(0) gets popped on this
8996 instruction. Instead store to operands[2] (which I
8997 think has to be st(1)). st(1) will be popped later.
8998 gcc <= 2.8.1 didn't have this check and generated
8999 assembly code that the Unixware assembler rejected. */
9000 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9002 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9006 if (STACK_TOP_P (operands[0]))
9007 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9009 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9014 if (MEM_P (operands[1]))
9020 if (MEM_P (operands[2]))
9026 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9029 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9030 derived assemblers, confusingly reverse the direction of
9031 the operation for fsub{r} and fdiv{r} when the
9032 destination register is not st(0). The Intel assembler
9033 doesn't have this brain damage. Read !SYSV386_COMPAT to
9034 figure out what the hardware really does. */
9035 if (STACK_TOP_P (operands[0]))
9036 p = "{p\t%0, %2|rp\t%2, %0}";
9038 p = "{rp\t%2, %0|p\t%0, %2}";
9040 if (STACK_TOP_P (operands[0]))
9041 /* As above for fmul/fadd, we can't store to st(0). */
9042 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9044 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9049 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9052 if (STACK_TOP_P (operands[0]))
9053 p = "{rp\t%0, %1|p\t%1, %0}";
9055 p = "{p\t%1, %0|rp\t%0, %1}";
9057 if (STACK_TOP_P (operands[0]))
9058 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9060 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9065 if (STACK_TOP_P (operands[0]))
9067 if (STACK_TOP_P (operands[1]))
9068 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9070 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9073 else if (STACK_TOP_P (operands[1]))
9076 p = "{\t%1, %0|r\t%0, %1}";
9078 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9084 p = "{r\t%2, %0|\t%0, %2}";
9086 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9099 /* Return needed mode for entity in optimize_mode_switching pass. */
9102 ix86_mode_needed (int entity, rtx insn)
9104 enum attr_i387_cw mode;
9106 /* The mode UNINITIALIZED is used to store control word after a
9107 function call or ASM pattern. The mode ANY specify that function
9108 has no requirements on the control word and make no changes in the
9109 bits we are interested in. */
9112 || (NONJUMP_INSN_P (insn)
9113 && (asm_noperands (PATTERN (insn)) >= 0
9114 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9115 return I387_CW_UNINITIALIZED;
9117 if (recog_memoized (insn) < 0)
9120 mode = get_attr_i387_cw (insn);
9125 if (mode == I387_CW_TRUNC)
9130 if (mode == I387_CW_FLOOR)
9135 if (mode == I387_CW_CEIL)
9140 if (mode == I387_CW_MASK_PM)
9151 /* Output code to initialize control word copies used by trunc?f?i and
9152 rounding patterns. CURRENT_MODE is set to current control word,
9153 while NEW_MODE is set to new control word. */
9156 emit_i387_cw_initialization (int mode)
9158 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9163 rtx reg = gen_reg_rtx (HImode);
9165 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9166 emit_move_insn (reg, copy_rtx (stored_mode));
9168 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9173 /* round toward zero (truncate) */
9174 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9175 slot = SLOT_CW_TRUNC;
9179 /* round down toward -oo */
9180 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9181 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9182 slot = SLOT_CW_FLOOR;
9186 /* round up toward +oo */
9187 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9188 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9189 slot = SLOT_CW_CEIL;
9192 case I387_CW_MASK_PM:
9193 /* mask precision exception for nearbyint() */
9194 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9195 slot = SLOT_CW_MASK_PM;
9207 /* round toward zero (truncate) */
9208 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9209 slot = SLOT_CW_TRUNC;
9213 /* round down toward -oo */
9214 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9215 slot = SLOT_CW_FLOOR;
9219 /* round up toward +oo */
9220 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9221 slot = SLOT_CW_CEIL;
9224 case I387_CW_MASK_PM:
9225 /* mask precision exception for nearbyint() */
9226 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9227 slot = SLOT_CW_MASK_PM;
9235 gcc_assert (slot < MAX_386_STACK_LOCALS);
9237 new_mode = assign_386_stack_local (HImode, slot);
9238 emit_move_insn (new_mode, reg);
9241 /* Output code for INSN to convert a float to a signed int. OPERANDS
9242 are the insn operands. The output may be [HSD]Imode and the input
9243 operand may be [SDX]Fmode. */
9246 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9248 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9249 int dimode_p = GET_MODE (operands[0]) == DImode;
9250 int round_mode = get_attr_i387_cw (insn);
9252 /* Jump through a hoop or two for DImode, since the hardware has no
9253 non-popping instruction. We used to do this a different way, but
9254 that was somewhat fragile and broke with post-reload splitters. */
9255 if ((dimode_p || fisttp) && !stack_top_dies)
9256 output_asm_insn ("fld\t%y1", operands);
9258 gcc_assert (STACK_TOP_P (operands[1]));
9259 gcc_assert (MEM_P (operands[0]));
9260 gcc_assert (GET_MODE (operands[1]) != TFmode);
9263 output_asm_insn ("fisttp%z0\t%0", operands);
9266 if (round_mode != I387_CW_ANY)
9267 output_asm_insn ("fldcw\t%3", operands);
9268 if (stack_top_dies || dimode_p)
9269 output_asm_insn ("fistp%z0\t%0", operands);
9271 output_asm_insn ("fist%z0\t%0", operands);
9272 if (round_mode != I387_CW_ANY)
9273 output_asm_insn ("fldcw\t%2", operands);
9279 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9280 have the values zero or one, indicates the ffreep insn's operand
9281 from the OPERANDS array. */
9284 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9286 if (TARGET_USE_FFREEP)
9287 #if HAVE_AS_IX86_FFREEP
9288 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9291 static char retval[] = ".word\t0xc_df";
9292 int regno = REGNO (operands[opno]);
9294 gcc_assert (FP_REGNO_P (regno));
9296 retval[9] = '0' + (regno - FIRST_STACK_REG);
9301 return opno ? "fstp\t%y1" : "fstp\t%y0";
9305 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9306 should be used. UNORDERED_P is true when fucom should be used. */
9309 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9312 rtx cmp_op0, cmp_op1;
9313 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9317 cmp_op0 = operands[0];
9318 cmp_op1 = operands[1];
9322 cmp_op0 = operands[1];
9323 cmp_op1 = operands[2];
9328 if (GET_MODE (operands[0]) == SFmode)
9330 return "ucomiss\t{%1, %0|%0, %1}";
9332 return "comiss\t{%1, %0|%0, %1}";
9335 return "ucomisd\t{%1, %0|%0, %1}";
9337 return "comisd\t{%1, %0|%0, %1}";
9340 gcc_assert (STACK_TOP_P (cmp_op0));
9342 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9344 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9348 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9349 return output_387_ffreep (operands, 1);
9352 return "ftst\n\tfnstsw\t%0";
9355 if (STACK_REG_P (cmp_op1)
9357 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9358 && REGNO (cmp_op1) != FIRST_STACK_REG)
9360 /* If both the top of the 387 stack dies, and the other operand
9361 is also a stack register that dies, then this must be a
9362 `fcompp' float compare */
9366 /* There is no double popping fcomi variant. Fortunately,
9367 eflags is immune from the fstp's cc clobbering. */
9369 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9371 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9372 return output_387_ffreep (operands, 0);
9377 return "fucompp\n\tfnstsw\t%0";
9379 return "fcompp\n\tfnstsw\t%0";
9384 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9386 static const char * const alt[16] =
9388 "fcom%z2\t%y2\n\tfnstsw\t%0",
9389 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9390 "fucom%z2\t%y2\n\tfnstsw\t%0",
9391 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9393 "ficom%z2\t%y2\n\tfnstsw\t%0",
9394 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9398 "fcomi\t{%y1, %0|%0, %y1}",
9399 "fcomip\t{%y1, %0|%0, %y1}",
9400 "fucomi\t{%y1, %0|%0, %y1}",
9401 "fucomip\t{%y1, %0|%0, %y1}",
9412 mask = eflags_p << 3;
9413 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9414 mask |= unordered_p << 1;
9415 mask |= stack_top_dies;
9417 gcc_assert (mask < 16);
9426 ix86_output_addr_vec_elt (FILE *file, int value)
9428 const char *directive = ASM_LONG;
9432 directive = ASM_QUAD;
9434 gcc_assert (!TARGET_64BIT);
9437 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9441 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9443 const char *directive = ASM_LONG;
9446 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9447 directive = ASM_QUAD;
9449 gcc_assert (!TARGET_64BIT);
9451 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9452 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9453 fprintf (file, "%s%s%d-%s%d\n",
9454 directive, LPREFIX, value, LPREFIX, rel);
9455 else if (HAVE_AS_GOTOFF_IN_DATA)
9456 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9458 else if (TARGET_MACHO)
9460 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9461 machopic_output_function_base_name (file);
9462 fprintf(file, "\n");
9466 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9467 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9470 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9474 ix86_expand_clear (rtx dest)
9478 /* We play register width games, which are only valid after reload. */
9479 gcc_assert (reload_completed);
9481 /* Avoid HImode and its attendant prefix byte. */
9482 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9483 dest = gen_rtx_REG (SImode, REGNO (dest));
9484 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9486 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9487 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9489 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9490 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9496 /* X is an unchanging MEM. If it is a constant pool reference, return
9497 the constant pool rtx, else NULL. */
9500 maybe_get_pool_constant (rtx x)
9502 x = ix86_delegitimize_address (XEXP (x, 0));
9504 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9505 return get_pool_constant (x);
9511 ix86_expand_move (enum machine_mode mode, rtx operands[])
9513 int strict = (reload_in_progress || reload_completed);
9515 enum tls_model model;
9520 if (GET_CODE (op1) == SYMBOL_REF)
9522 model = SYMBOL_REF_TLS_MODEL (op1);
9525 op1 = legitimize_tls_address (op1, model, true);
9526 op1 = force_operand (op1, op0);
9530 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9531 && SYMBOL_REF_DLLIMPORT_P (op1))
9532 op1 = legitimize_dllimport_symbol (op1, false);
9534 else if (GET_CODE (op1) == CONST
9535 && GET_CODE (XEXP (op1, 0)) == PLUS
9536 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9538 rtx addend = XEXP (XEXP (op1, 0), 1);
9539 rtx symbol = XEXP (XEXP (op1, 0), 0);
9542 model = SYMBOL_REF_TLS_MODEL (symbol);
9544 tmp = legitimize_tls_address (symbol, model, true);
9545 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9546 && SYMBOL_REF_DLLIMPORT_P (symbol))
9547 tmp = legitimize_dllimport_symbol (symbol, true);
9551 tmp = force_operand (tmp, NULL);
9552 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9553 op0, 1, OPTAB_DIRECT);
9559 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9561 if (TARGET_MACHO && !TARGET_64BIT)
9566 rtx temp = ((reload_in_progress
9567 || ((op0 && REG_P (op0))
9569 ? op0 : gen_reg_rtx (Pmode));
9570 op1 = machopic_indirect_data_reference (op1, temp);
9571 op1 = machopic_legitimize_pic_address (op1, mode,
9572 temp == op1 ? 0 : temp);
9574 else if (MACHOPIC_INDIRECT)
9575 op1 = machopic_indirect_data_reference (op1, 0);
9583 op1 = force_reg (Pmode, op1);
9584 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9586 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9587 op1 = legitimize_pic_address (op1, reg);
9596 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9597 || !push_operand (op0, mode))
9599 op1 = force_reg (mode, op1);
9601 if (push_operand (op0, mode)
9602 && ! general_no_elim_operand (op1, mode))
9603 op1 = copy_to_mode_reg (mode, op1);
9605 /* Force large constants in 64bit compilation into register
9606 to get them CSEed. */
9607 if (TARGET_64BIT && mode == DImode
9608 && immediate_operand (op1, mode)
9609 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9610 && !register_operand (op0, mode)
9611 && optimize && !reload_completed && !reload_in_progress)
9612 op1 = copy_to_mode_reg (mode, op1);
9614 if (FLOAT_MODE_P (mode))
9616 /* If we are loading a floating point constant to a register,
9617 force the value to memory now, since we'll get better code
9618 out the back end. */
9622 else if (GET_CODE (op1) == CONST_DOUBLE)
9624 op1 = validize_mem (force_const_mem (mode, op1));
9625 if (!register_operand (op0, mode))
9627 rtx temp = gen_reg_rtx (mode);
9628 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9629 emit_move_insn (op0, temp);
9636 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9640 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9642 rtx op0 = operands[0], op1 = operands[1];
9644 /* Force constants other than zero into memory. We do not know how
9645 the instructions used to build constants modify the upper 64 bits
9646 of the register, once we have that information we may be able
9647 to handle some of them more efficiently. */
9648 if ((reload_in_progress | reload_completed) == 0
9649 && register_operand (op0, mode)
9651 && standard_sse_constant_p (op1) <= 0)
9652 op1 = validize_mem (force_const_mem (mode, op1));
9654 /* Make operand1 a register if it isn't already. */
9656 && !register_operand (op0, mode)
9657 && !register_operand (op1, mode))
9659 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9663 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9666 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9667 straight to ix86_expand_vector_move. */
9668 /* Code generation for scalar reg-reg moves of single and double precision data:
9669 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9673 if (x86_sse_partial_reg_dependency == true)
9678 Code generation for scalar loads of double precision data:
9679 if (x86_sse_split_regs == true)
9680 movlpd mem, reg (gas syntax)
9684 Code generation for unaligned packed loads of single precision data
9685 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9686 if (x86_sse_unaligned_move_optimal)
9689 if (x86_sse_partial_reg_dependency == true)
9701 Code generation for unaligned packed loads of double precision data
9702 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9703 if (x86_sse_unaligned_move_optimal)
9706 if (x86_sse_split_regs == true)
9719 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9728 /* If we're optimizing for size, movups is the smallest. */
9731 op0 = gen_lowpart (V4SFmode, op0);
9732 op1 = gen_lowpart (V4SFmode, op1);
9733 emit_insn (gen_sse_movups (op0, op1));
9737 /* ??? If we have typed data, then it would appear that using
9738 movdqu is the only way to get unaligned data loaded with
9740 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9742 op0 = gen_lowpart (V16QImode, op0);
9743 op1 = gen_lowpart (V16QImode, op1);
9744 emit_insn (gen_sse2_movdqu (op0, op1));
9748 if (TARGET_SSE2 && mode == V2DFmode)
9752 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9754 op0 = gen_lowpart (V2DFmode, op0);
9755 op1 = gen_lowpart (V2DFmode, op1);
9756 emit_insn (gen_sse2_movupd (op0, op1));
9760 /* When SSE registers are split into halves, we can avoid
9761 writing to the top half twice. */
9762 if (TARGET_SSE_SPLIT_REGS)
9764 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9769 /* ??? Not sure about the best option for the Intel chips.
9770 The following would seem to satisfy; the register is
9771 entirely cleared, breaking the dependency chain. We
9772 then store to the upper half, with a dependency depth
9773 of one. A rumor has it that Intel recommends two movsd
9774 followed by an unpacklpd, but this is unconfirmed. And
9775 given that the dependency depth of the unpacklpd would
9776 still be one, I'm not sure why this would be better. */
9777 zero = CONST0_RTX (V2DFmode);
9780 m = adjust_address (op1, DFmode, 0);
9781 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9782 m = adjust_address (op1, DFmode, 8);
9783 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9787 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9789 op0 = gen_lowpart (V4SFmode, op0);
9790 op1 = gen_lowpart (V4SFmode, op1);
9791 emit_insn (gen_sse_movups (op0, op1));
9795 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9796 emit_move_insn (op0, CONST0_RTX (mode));
9798 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9800 if (mode != V4SFmode)
9801 op0 = gen_lowpart (V4SFmode, op0);
9802 m = adjust_address (op1, V2SFmode, 0);
9803 emit_insn (gen_sse_loadlps (op0, op0, m));
9804 m = adjust_address (op1, V2SFmode, 8);
9805 emit_insn (gen_sse_loadhps (op0, op0, m));
9808 else if (MEM_P (op0))
9810 /* If we're optimizing for size, movups is the smallest. */
9813 op0 = gen_lowpart (V4SFmode, op0);
9814 op1 = gen_lowpart (V4SFmode, op1);
9815 emit_insn (gen_sse_movups (op0, op1));
9819 /* ??? Similar to above, only less clear because of quote
9820 typeless stores unquote. */
9821 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9822 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9824 op0 = gen_lowpart (V16QImode, op0);
9825 op1 = gen_lowpart (V16QImode, op1);
9826 emit_insn (gen_sse2_movdqu (op0, op1));
9830 if (TARGET_SSE2 && mode == V2DFmode)
9832 m = adjust_address (op0, DFmode, 0);
9833 emit_insn (gen_sse2_storelpd (m, op1));
9834 m = adjust_address (op0, DFmode, 8);
9835 emit_insn (gen_sse2_storehpd (m, op1));
9839 if (mode != V4SFmode)
9840 op1 = gen_lowpart (V4SFmode, op1);
9841 m = adjust_address (op0, V2SFmode, 0);
9842 emit_insn (gen_sse_storelps (m, op1));
9843 m = adjust_address (op0, V2SFmode, 8);
9844 emit_insn (gen_sse_storehps (m, op1));
9851 /* Expand a push in MODE. This is some mode for which we do not support
9852 proper push instructions, at least from the registers that we expect
9853 the value to live in. */
9856 ix86_expand_push (enum machine_mode mode, rtx x)
9860 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9861 GEN_INT (-GET_MODE_SIZE (mode)),
9862 stack_pointer_rtx, 1, OPTAB_DIRECT);
9863 if (tmp != stack_pointer_rtx)
9864 emit_move_insn (stack_pointer_rtx, tmp);
9866 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9867 emit_move_insn (tmp, x);
9870 /* Helper function of ix86_fixup_binary_operands to canonicalize
9871 operand order. Returns true if the operands should be swapped. */
9874 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9877 rtx dst = operands[0];
9878 rtx src1 = operands[1];
9879 rtx src2 = operands[2];
9881 /* If the operation is not commutative, we can't do anything. */
9882 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9885 /* Highest priority is that src1 should match dst. */
9886 if (rtx_equal_p (dst, src1))
9888 if (rtx_equal_p (dst, src2))
9891 /* Next highest priority is that immediate constants come second. */
9892 if (immediate_operand (src2, mode))
9894 if (immediate_operand (src1, mode))
9897 /* Lowest priority is that memory references should come second. */
9907 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9908 destination to use for the operation. If different from the true
9909 destination in operands[0], a copy operation will be required. */
9912 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9915 rtx dst = operands[0];
9916 rtx src1 = operands[1];
9917 rtx src2 = operands[2];
9919 /* Canonicalize operand order. */
9920 if (ix86_swap_binary_operands_p (code, mode, operands))
9927 /* Both source operands cannot be in memory. */
9928 if (MEM_P (src1) && MEM_P (src2))
9930 /* Optimization: Only read from memory once. */
9931 if (rtx_equal_p (src1, src2))
9933 src2 = force_reg (mode, src2);
9937 src2 = force_reg (mode, src2);
9940 /* If the destination is memory, and we do not have matching source
9941 operands, do things in registers. */
9942 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9943 dst = gen_reg_rtx (mode);
9945 /* Source 1 cannot be a constant. */
9946 if (CONSTANT_P (src1))
9947 src1 = force_reg (mode, src1);
9949 /* Source 1 cannot be a non-matching memory. */
9950 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9951 src1 = force_reg (mode, src1);
9958 /* Similarly, but assume that the destination has already been
9962 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9963 enum machine_mode mode, rtx operands[])
9965 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9966 gcc_assert (dst == operands[0]);
9969 /* Attempt to expand a binary operator. Make the expansion closer to the
9970 actual machine, then just general_operand, which will allow 3 separate
9971 memory references (one output, two input) in a single insn. */
9974 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9977 rtx src1, src2, dst, op, clob;
9979 dst = ix86_fixup_binary_operands (code, mode, operands);
9983 /* Emit the instruction. */
9985 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9986 if (reload_in_progress)
9988 /* Reload doesn't know about the flags register, and doesn't know that
9989 it doesn't want to clobber it. We can only do this with PLUS. */
9990 gcc_assert (code == PLUS);
9995 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9996 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9999 /* Fix up the destination if needed. */
10000 if (dst != operands[0])
10001 emit_move_insn (operands[0], dst);
10004 /* Return TRUE or FALSE depending on whether the binary operator meets the
10005 appropriate constraints. */
10008 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10011 rtx dst = operands[0];
10012 rtx src1 = operands[1];
10013 rtx src2 = operands[2];
10015 /* Both source operands cannot be in memory. */
10016 if (MEM_P (src1) && MEM_P (src2))
10019 /* Canonicalize operand order for commutative operators. */
10020 if (ix86_swap_binary_operands_p (code, mode, operands))
10027 /* If the destination is memory, we must have a matching source operand. */
10028 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10031 /* Source 1 cannot be a constant. */
10032 if (CONSTANT_P (src1))
10035 /* Source 1 cannot be a non-matching memory. */
10036 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10042 /* Attempt to expand a unary operator. Make the expansion closer to the
10043 actual machine, then just general_operand, which will allow 2 separate
10044 memory references (one output, one input) in a single insn. */
10047 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10050 int matching_memory;
10051 rtx src, dst, op, clob;
10056 /* If the destination is memory, and we do not have matching source
10057 operands, do things in registers. */
10058 matching_memory = 0;
10061 if (rtx_equal_p (dst, src))
10062 matching_memory = 1;
10064 dst = gen_reg_rtx (mode);
10067 /* When source operand is memory, destination must match. */
10068 if (MEM_P (src) && !matching_memory)
10069 src = force_reg (mode, src);
10071 /* Emit the instruction. */
10073 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10074 if (reload_in_progress || code == NOT)
10076 /* Reload doesn't know about the flags register, and doesn't know that
10077 it doesn't want to clobber it. */
10078 gcc_assert (code == NOT);
10083 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10084 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10087 /* Fix up the destination if needed. */
10088 if (dst != operands[0])
10089 emit_move_insn (operands[0], dst);
10092 /* Return TRUE or FALSE depending on whether the unary operator meets the
10093 appropriate constraints. */
10096 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10097 enum machine_mode mode ATTRIBUTE_UNUSED,
10098 rtx operands[2] ATTRIBUTE_UNUSED)
10100 /* If one of operands is memory, source and destination must match. */
10101 if ((MEM_P (operands[0])
10102 || MEM_P (operands[1]))
10103 && ! rtx_equal_p (operands[0], operands[1]))
10108 /* Post-reload splitter for converting an SF or DFmode value in an
10109 SSE register into an unsigned SImode. */
10112 ix86_split_convert_uns_si_sse (rtx operands[])
10114 enum machine_mode vecmode;
10115 rtx value, large, zero_or_two31, input, two31, x;
10117 large = operands[1];
10118 zero_or_two31 = operands[2];
10119 input = operands[3];
10120 two31 = operands[4];
10121 vecmode = GET_MODE (large);
10122 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10124 /* Load up the value into the low element. We must ensure that the other
10125 elements are valid floats -- zero is the easiest such value. */
10128 if (vecmode == V4SFmode)
10129 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10131 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10135 input = gen_rtx_REG (vecmode, REGNO (input));
10136 emit_move_insn (value, CONST0_RTX (vecmode));
10137 if (vecmode == V4SFmode)
10138 emit_insn (gen_sse_movss (value, value, input));
10140 emit_insn (gen_sse2_movsd (value, value, input));
10143 emit_move_insn (large, two31);
10144 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10146 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10147 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10149 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10150 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10152 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10153 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10155 large = gen_rtx_REG (V4SImode, REGNO (large));
10156 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10158 x = gen_rtx_REG (V4SImode, REGNO (value));
10159 if (vecmode == V4SFmode)
10160 emit_insn (gen_sse2_cvttps2dq (x, value));
10162 emit_insn (gen_sse2_cvttpd2dq (x, value));
10165 emit_insn (gen_xorv4si3 (value, value, large));
10168 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10169 Expects the 64-bit DImode to be supplied in a pair of integral
10170 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10171 -mfpmath=sse, !optimize_size only. */
10174 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10176 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10177 rtx int_xmm, fp_xmm;
10178 rtx biases, exponents;
10181 int_xmm = gen_reg_rtx (V4SImode);
10182 if (TARGET_INTER_UNIT_MOVES)
10183 emit_insn (gen_movdi_to_sse (int_xmm, input));
10184 else if (TARGET_SSE_SPLIT_REGS)
10186 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10187 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10191 x = gen_reg_rtx (V2DImode);
10192 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10193 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10196 x = gen_rtx_CONST_VECTOR (V4SImode,
10197 gen_rtvec (4, GEN_INT (0x43300000UL),
10198 GEN_INT (0x45300000UL),
10199 const0_rtx, const0_rtx));
10200 exponents = validize_mem (force_const_mem (V4SImode, x));
10202 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10203 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10205 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10206 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10207 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10208 (0x1.0p84 + double(fp_value_hi_xmm)).
10209 Note these exponents differ by 32. */
10211 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10213 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10214 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10215 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10216 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10217 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10218 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10219 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10220 biases = validize_mem (force_const_mem (V2DFmode, biases));
10221 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10223 /* Add the upper and lower DFmode values together. */
10225 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10228 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10229 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10230 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10233 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10236 /* Convert an unsigned SImode value into a DFmode. Only currently used
10237 for SSE, but applicable anywhere. */
10240 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10242 REAL_VALUE_TYPE TWO31r;
10245 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10246 NULL, 1, OPTAB_DIRECT);
10248 fp = gen_reg_rtx (DFmode);
10249 emit_insn (gen_floatsidf2 (fp, x));
10251 real_ldexp (&TWO31r, &dconst1, 31);
10252 x = const_double_from_real_value (TWO31r, DFmode);
10254 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10256 emit_move_insn (target, x);
10259 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10260 32-bit mode; otherwise we have a direct convert instruction. */
10263 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10265 REAL_VALUE_TYPE TWO32r;
10266 rtx fp_lo, fp_hi, x;
10268 fp_lo = gen_reg_rtx (DFmode);
10269 fp_hi = gen_reg_rtx (DFmode);
10271 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10273 real_ldexp (&TWO32r, &dconst1, 32);
10274 x = const_double_from_real_value (TWO32r, DFmode);
10275 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10277 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10279 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10282 emit_move_insn (target, x);
10285 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10286 For x86_32, -mfpmath=sse, !optimize_size only. */
10288 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10290 REAL_VALUE_TYPE ONE16r;
10291 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10293 real_ldexp (&ONE16r, &dconst1, 16);
10294 x = const_double_from_real_value (ONE16r, SFmode);
10295 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10296 NULL, 0, OPTAB_DIRECT);
10297 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10298 NULL, 0, OPTAB_DIRECT);
10299 fp_hi = gen_reg_rtx (SFmode);
10300 fp_lo = gen_reg_rtx (SFmode);
10301 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10302 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10303 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10305 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10307 if (!rtx_equal_p (target, fp_hi))
10308 emit_move_insn (target, fp_hi);
10311 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10312 then replicate the value for all elements of the vector
10316 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10323 v = gen_rtvec (4, value, value, value, value);
10325 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10326 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10327 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10331 v = gen_rtvec (2, value, value);
10333 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10334 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10337 gcc_unreachable ();
10341 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10342 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10343 true, then replicate the mask for all elements of the vector register.
10344 If INVERT is true, then create a mask excluding the sign bit. */
10347 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10349 enum machine_mode vec_mode;
10350 HOST_WIDE_INT hi, lo;
10355 /* Find the sign bit, sign extended to 2*HWI. */
10356 if (mode == SFmode)
10357 lo = 0x80000000, hi = lo < 0;
10358 else if (HOST_BITS_PER_WIDE_INT >= 64)
10359 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10361 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10364 lo = ~lo, hi = ~hi;
10366 /* Force this value into the low part of a fp vector constant. */
10367 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10368 mask = gen_lowpart (mode, mask);
10370 v = ix86_build_const_vector (mode, vect, mask);
10371 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10372 return force_reg (vec_mode, v);
10375 /* Generate code for floating point ABS or NEG. */
10378 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10381 rtx mask, set, use, clob, dst, src;
10382 bool matching_memory;
10383 bool use_sse = false;
10384 bool vector_mode = VECTOR_MODE_P (mode);
10385 enum machine_mode elt_mode = mode;
10389 elt_mode = GET_MODE_INNER (mode);
10392 else if (TARGET_SSE_MATH)
10393 use_sse = SSE_FLOAT_MODE_P (mode);
10395 /* NEG and ABS performed with SSE use bitwise mask operations.
10396 Create the appropriate mask now. */
10398 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10405 /* If the destination is memory, and we don't have matching source
10406 operands or we're using the x87, do things in registers. */
10407 matching_memory = false;
10410 if (use_sse && rtx_equal_p (dst, src))
10411 matching_memory = true;
10413 dst = gen_reg_rtx (mode);
10415 if (MEM_P (src) && !matching_memory)
10416 src = force_reg (mode, src);
10420 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10421 set = gen_rtx_SET (VOIDmode, dst, set);
10426 set = gen_rtx_fmt_e (code, mode, src);
10427 set = gen_rtx_SET (VOIDmode, dst, set);
10430 use = gen_rtx_USE (VOIDmode, mask);
10431 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10432 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10433 gen_rtvec (3, set, use, clob)));
10439 if (dst != operands[0])
10440 emit_move_insn (operands[0], dst);
10443 /* Expand a copysign operation. Special case operand 0 being a constant. */
10446 ix86_expand_copysign (rtx operands[])
10448 enum machine_mode mode, vmode;
10449 rtx dest, op0, op1, mask, nmask;
10451 dest = operands[0];
10455 mode = GET_MODE (dest);
10456 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10458 if (GET_CODE (op0) == CONST_DOUBLE)
10462 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10463 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10465 if (op0 == CONST0_RTX (mode))
10466 op0 = CONST0_RTX (vmode);
10469 if (mode == SFmode)
10470 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10471 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10473 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10474 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10477 mask = ix86_build_signbit_mask (mode, 0, 0);
10479 if (mode == SFmode)
10480 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10482 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10486 nmask = ix86_build_signbit_mask (mode, 0, 1);
10487 mask = ix86_build_signbit_mask (mode, 0, 0);
10489 if (mode == SFmode)
10490 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10492 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10496 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10497 be a constant, and so has already been expanded into a vector constant. */
10500 ix86_split_copysign_const (rtx operands[])
10502 enum machine_mode mode, vmode;
10503 rtx dest, op0, op1, mask, x;
10505 dest = operands[0];
10508 mask = operands[3];
10510 mode = GET_MODE (dest);
10511 vmode = GET_MODE (mask);
10513 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10514 x = gen_rtx_AND (vmode, dest, mask);
10515 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10517 if (op0 != CONST0_RTX (vmode))
10519 x = gen_rtx_IOR (vmode, dest, op0);
10520 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10524 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10525 so we have to do two masks. */
10528 ix86_split_copysign_var (rtx operands[])
10530 enum machine_mode mode, vmode;
10531 rtx dest, scratch, op0, op1, mask, nmask, x;
10533 dest = operands[0];
10534 scratch = operands[1];
10537 nmask = operands[4];
10538 mask = operands[5];
10540 mode = GET_MODE (dest);
10541 vmode = GET_MODE (mask);
10543 if (rtx_equal_p (op0, op1))
10545 /* Shouldn't happen often (it's useless, obviously), but when it does
10546 we'd generate incorrect code if we continue below. */
10547 emit_move_insn (dest, op0);
10551 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10553 gcc_assert (REGNO (op1) == REGNO (scratch));
10555 x = gen_rtx_AND (vmode, scratch, mask);
10556 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10559 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10560 x = gen_rtx_NOT (vmode, dest);
10561 x = gen_rtx_AND (vmode, x, op0);
10562 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10566 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10568 x = gen_rtx_AND (vmode, scratch, mask);
10570 else /* alternative 2,4 */
10572 gcc_assert (REGNO (mask) == REGNO (scratch));
10573 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10574 x = gen_rtx_AND (vmode, scratch, op1);
10576 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10578 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10580 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10581 x = gen_rtx_AND (vmode, dest, nmask);
10583 else /* alternative 3,4 */
10585 gcc_assert (REGNO (nmask) == REGNO (dest));
10587 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10588 x = gen_rtx_AND (vmode, dest, op0);
10590 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10593 x = gen_rtx_IOR (vmode, dest, scratch);
10594 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10597 /* Return TRUE or FALSE depending on whether the first SET in INSN
10598 has source and destination with matching CC modes, and that the
10599 CC mode is at least as constrained as REQ_MODE. */
10602 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10605 enum machine_mode set_mode;
10607 set = PATTERN (insn);
10608 if (GET_CODE (set) == PARALLEL)
10609 set = XVECEXP (set, 0, 0);
10610 gcc_assert (GET_CODE (set) == SET);
10611 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10613 set_mode = GET_MODE (SET_DEST (set));
10617 if (req_mode != CCNOmode
10618 && (req_mode != CCmode
10619 || XEXP (SET_SRC (set), 1) != const0_rtx))
10623 if (req_mode == CCGCmode)
10627 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10631 if (req_mode == CCZmode)
10638 gcc_unreachable ();
10641 return (GET_MODE (SET_SRC (set)) == set_mode);
10644 /* Generate insn patterns to do an integer compare of OPERANDS. */
10647 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10649 enum machine_mode cmpmode;
10652 cmpmode = SELECT_CC_MODE (code, op0, op1);
10653 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10655 /* This is very simple, but making the interface the same as in the
10656 FP case makes the rest of the code easier. */
10657 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10658 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10660 /* Return the test that should be put into the flags user, i.e.
10661 the bcc, scc, or cmov instruction. */
10662 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10665 /* Figure out whether to use ordered or unordered fp comparisons.
10666 Return the appropriate mode to use. */
10669 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10671 /* ??? In order to make all comparisons reversible, we do all comparisons
10672 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10673 all forms trapping and nontrapping comparisons, we can make inequality
10674 comparisons trapping again, since it results in better code when using
10675 FCOM based compares. */
10676 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10680 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10682 enum machine_mode mode = GET_MODE (op0);
10684 if (SCALAR_FLOAT_MODE_P (mode))
10686 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10687 return ix86_fp_compare_mode (code);
10692 /* Only zero flag is needed. */
10693 case EQ: /* ZF=0 */
10694 case NE: /* ZF!=0 */
10696 /* Codes needing carry flag. */
10697 case GEU: /* CF=0 */
10698 case GTU: /* CF=0 & ZF=0 */
10699 case LTU: /* CF=1 */
10700 case LEU: /* CF=1 | ZF=1 */
10702 /* Codes possibly doable only with sign flag when
10703 comparing against zero. */
10704 case GE: /* SF=OF or SF=0 */
10705 case LT: /* SF<>OF or SF=1 */
10706 if (op1 == const0_rtx)
10709 /* For other cases Carry flag is not required. */
10711 /* Codes doable only with sign flag when comparing
10712 against zero, but we miss jump instruction for it
10713 so we need to use relational tests against overflow
10714 that thus needs to be zero. */
10715 case GT: /* ZF=0 & SF=OF */
10716 case LE: /* ZF=1 | SF<>OF */
10717 if (op1 == const0_rtx)
10721 /* strcmp pattern do (use flags) and combine may ask us for proper
10726 gcc_unreachable ();
10730 /* Return the fixed registers used for condition codes. */
10733 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10740 /* If two condition code modes are compatible, return a condition code
10741 mode which is compatible with both. Otherwise, return
10744 static enum machine_mode
10745 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10750 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10753 if ((m1 == CCGCmode && m2 == CCGOCmode)
10754 || (m1 == CCGOCmode && m2 == CCGCmode))
10760 gcc_unreachable ();
10782 /* These are only compatible with themselves, which we already
10788 /* Split comparison code CODE into comparisons we can do using branch
10789 instructions. BYPASS_CODE is comparison code for branch that will
10790 branch around FIRST_CODE and SECOND_CODE. If some of branches
10791 is not required, set value to UNKNOWN.
10792 We never require more than two branches. */
10795 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10796 enum rtx_code *first_code,
10797 enum rtx_code *second_code)
10799 *first_code = code;
10800 *bypass_code = UNKNOWN;
10801 *second_code = UNKNOWN;
10803 /* The fcomi comparison sets flags as follows:
10813 case GT: /* GTU - CF=0 & ZF=0 */
10814 case GE: /* GEU - CF=0 */
10815 case ORDERED: /* PF=0 */
10816 case UNORDERED: /* PF=1 */
10817 case UNEQ: /* EQ - ZF=1 */
10818 case UNLT: /* LTU - CF=1 */
10819 case UNLE: /* LEU - CF=1 | ZF=1 */
10820 case LTGT: /* EQ - ZF=0 */
10822 case LT: /* LTU - CF=1 - fails on unordered */
10823 *first_code = UNLT;
10824 *bypass_code = UNORDERED;
10826 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10827 *first_code = UNLE;
10828 *bypass_code = UNORDERED;
10830 case EQ: /* EQ - ZF=1 - fails on unordered */
10831 *first_code = UNEQ;
10832 *bypass_code = UNORDERED;
10834 case NE: /* NE - ZF=0 - fails on unordered */
10835 *first_code = LTGT;
10836 *second_code = UNORDERED;
10838 case UNGE: /* GEU - CF=0 - fails on unordered */
10840 *second_code = UNORDERED;
10842 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10844 *second_code = UNORDERED;
10847 gcc_unreachable ();
10849 if (!TARGET_IEEE_FP)
10851 *second_code = UNKNOWN;
10852 *bypass_code = UNKNOWN;
10856 /* Return cost of comparison done fcom + arithmetics operations on AX.
10857 All following functions do use number of instructions as a cost metrics.
10858 In future this should be tweaked to compute bytes for optimize_size and
10859 take into account performance of various instructions on various CPUs. */
10861 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10863 if (!TARGET_IEEE_FP)
10865 /* The cost of code output by ix86_expand_fp_compare. */
10889 gcc_unreachable ();
10893 /* Return cost of comparison done using fcomi operation.
10894 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10896 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10898 enum rtx_code bypass_code, first_code, second_code;
10899 /* Return arbitrarily high cost when instruction is not supported - this
10900 prevents gcc from using it. */
10903 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10904 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10907 /* Return cost of comparison done using sahf operation.
10908 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10910 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10912 enum rtx_code bypass_code, first_code, second_code;
10913 /* Return arbitrarily high cost when instruction is not preferred - this
10914 avoids gcc from using it. */
10915 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10917 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10918 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10921 /* Compute cost of the comparison done using any method.
10922 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10924 ix86_fp_comparison_cost (enum rtx_code code)
10926 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10929 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10930 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10932 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10933 if (min > sahf_cost)
10935 if (min > fcomi_cost)
10940 /* Return true if we should use an FCOMI instruction for this
10944 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10946 enum rtx_code swapped_code = swap_condition (code);
10948 return ((ix86_fp_comparison_cost (code)
10949 == ix86_fp_comparison_fcomi_cost (code))
10950 || (ix86_fp_comparison_cost (swapped_code)
10951 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10954 /* Swap, force into registers, or otherwise massage the two operands
10955 to a fp comparison. The operands are updated in place; the new
10956 comparison code is returned. */
10958 static enum rtx_code
10959 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10961 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10962 rtx op0 = *pop0, op1 = *pop1;
10963 enum machine_mode op_mode = GET_MODE (op0);
10964 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10966 /* All of the unordered compare instructions only work on registers.
10967 The same is true of the fcomi compare instructions. The XFmode
10968 compare instructions require registers except when comparing
10969 against zero or when converting operand 1 from fixed point to
10973 && (fpcmp_mode == CCFPUmode
10974 || (op_mode == XFmode
10975 && ! (standard_80387_constant_p (op0) == 1
10976 || standard_80387_constant_p (op1) == 1)
10977 && GET_CODE (op1) != FLOAT)
10978 || ix86_use_fcomi_compare (code)))
10980 op0 = force_reg (op_mode, op0);
10981 op1 = force_reg (op_mode, op1);
10985 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10986 things around if they appear profitable, otherwise force op0
10987 into a register. */
10989 if (standard_80387_constant_p (op0) == 0
10991 && ! (standard_80387_constant_p (op1) == 0
10995 tmp = op0, op0 = op1, op1 = tmp;
10996 code = swap_condition (code);
11000 op0 = force_reg (op_mode, op0);
11002 if (CONSTANT_P (op1))
11004 int tmp = standard_80387_constant_p (op1);
11006 op1 = validize_mem (force_const_mem (op_mode, op1));
11010 op1 = force_reg (op_mode, op1);
11013 op1 = force_reg (op_mode, op1);
11017 /* Try to rearrange the comparison to make it cheaper. */
11018 if (ix86_fp_comparison_cost (code)
11019 > ix86_fp_comparison_cost (swap_condition (code))
11020 && (REG_P (op1) || !no_new_pseudos))
11023 tmp = op0, op0 = op1, op1 = tmp;
11024 code = swap_condition (code);
11026 op0 = force_reg (op_mode, op0);
11034 /* Convert comparison codes we use to represent FP comparison to integer
11035 code that will result in proper branch. Return UNKNOWN if no such code
11039 ix86_fp_compare_code_to_integer (enum rtx_code code)
11068 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11071 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11072 rtx *second_test, rtx *bypass_test)
11074 enum machine_mode fpcmp_mode, intcmp_mode;
11076 int cost = ix86_fp_comparison_cost (code);
11077 enum rtx_code bypass_code, first_code, second_code;
11079 fpcmp_mode = ix86_fp_compare_mode (code);
11080 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11083 *second_test = NULL_RTX;
11085 *bypass_test = NULL_RTX;
11087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11089 /* Do fcomi/sahf based test when profitable. */
11090 if ((TARGET_CMOVE || TARGET_SAHF)
11091 && (bypass_code == UNKNOWN || bypass_test)
11092 && (second_code == UNKNOWN || second_test)
11093 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11097 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11098 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11104 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11105 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11107 scratch = gen_reg_rtx (HImode);
11108 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11109 emit_insn (gen_x86_sahf_1 (scratch));
11112 /* The FP codes work out to act like unsigned. */
11113 intcmp_mode = fpcmp_mode;
11115 if (bypass_code != UNKNOWN)
11116 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11117 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11119 if (second_code != UNKNOWN)
11120 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11121 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11126 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11127 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11128 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11130 scratch = gen_reg_rtx (HImode);
11131 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11133 /* In the unordered case, we have to check C2 for NaN's, which
11134 doesn't happen to work out to anything nice combination-wise.
11135 So do some bit twiddling on the value we've got in AH to come
11136 up with an appropriate set of condition codes. */
11138 intcmp_mode = CCNOmode;
11143 if (code == GT || !TARGET_IEEE_FP)
11145 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11150 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11151 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11152 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11153 intcmp_mode = CCmode;
11159 if (code == LT && TARGET_IEEE_FP)
11161 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11162 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11163 intcmp_mode = CCmode;
11168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11174 if (code == GE || !TARGET_IEEE_FP)
11176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11182 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11189 if (code == LE && TARGET_IEEE_FP)
11191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11192 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11194 intcmp_mode = CCmode;
11199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11205 if (code == EQ && TARGET_IEEE_FP)
11207 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11208 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11209 intcmp_mode = CCmode;
11214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11221 if (code == NE && TARGET_IEEE_FP)
11223 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11224 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11230 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11236 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11245 gcc_unreachable ();
11249 /* Return the test that should be put into the flags user, i.e.
11250 the bcc, scc, or cmov instruction. */
11251 return gen_rtx_fmt_ee (code, VOIDmode,
11252 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11257 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11260 op0 = ix86_compare_op0;
11261 op1 = ix86_compare_op1;
11264 *second_test = NULL_RTX;
11266 *bypass_test = NULL_RTX;
11268 if (ix86_compare_emitted)
11270 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11271 ix86_compare_emitted = NULL_RTX;
11273 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11275 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11276 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11277 second_test, bypass_test);
11280 ret = ix86_expand_int_compare (code, op0, op1);
11285 /* Return true if the CODE will result in nontrivial jump sequence. */
11287 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11289 enum rtx_code bypass_code, first_code, second_code;
11292 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11293 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11297 ix86_expand_branch (enum rtx_code code, rtx label)
11301 /* If we have emitted a compare insn, go straight to simple.
11302 ix86_expand_compare won't emit anything if ix86_compare_emitted
11304 if (ix86_compare_emitted)
11307 switch (GET_MODE (ix86_compare_op0))
11313 tmp = ix86_expand_compare (code, NULL, NULL);
11314 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11315 gen_rtx_LABEL_REF (VOIDmode, label),
11317 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11326 enum rtx_code bypass_code, first_code, second_code;
11328 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11329 &ix86_compare_op1);
11331 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11333 /* Check whether we will use the natural sequence with one jump. If
11334 so, we can expand jump early. Otherwise delay expansion by
11335 creating compound insn to not confuse optimizers. */
11336 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11339 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11340 gen_rtx_LABEL_REF (VOIDmode, label),
11341 pc_rtx, NULL_RTX, NULL_RTX);
11345 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11346 ix86_compare_op0, ix86_compare_op1);
11347 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11348 gen_rtx_LABEL_REF (VOIDmode, label),
11350 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11352 use_fcomi = ix86_use_fcomi_compare (code);
11353 vec = rtvec_alloc (3 + !use_fcomi);
11354 RTVEC_ELT (vec, 0) = tmp;
11356 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11361 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11363 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11372 /* Expand DImode branch into multiple compare+branch. */
11374 rtx lo[2], hi[2], label2;
11375 enum rtx_code code1, code2, code3;
11376 enum machine_mode submode;
11378 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11380 tmp = ix86_compare_op0;
11381 ix86_compare_op0 = ix86_compare_op1;
11382 ix86_compare_op1 = tmp;
11383 code = swap_condition (code);
11385 if (GET_MODE (ix86_compare_op0) == DImode)
11387 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11388 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11393 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11394 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11398 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11399 avoid two branches. This costs one extra insn, so disable when
11400 optimizing for size. */
11402 if ((code == EQ || code == NE)
11404 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11409 if (hi[1] != const0_rtx)
11410 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11411 NULL_RTX, 0, OPTAB_WIDEN);
11414 if (lo[1] != const0_rtx)
11415 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11416 NULL_RTX, 0, OPTAB_WIDEN);
11418 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11419 NULL_RTX, 0, OPTAB_WIDEN);
11421 ix86_compare_op0 = tmp;
11422 ix86_compare_op1 = const0_rtx;
11423 ix86_expand_branch (code, label);
11427 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11428 op1 is a constant and the low word is zero, then we can just
11429 examine the high word. */
11431 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11434 case LT: case LTU: case GE: case GEU:
11435 ix86_compare_op0 = hi[0];
11436 ix86_compare_op1 = hi[1];
11437 ix86_expand_branch (code, label);
11443 /* Otherwise, we need two or three jumps. */
11445 label2 = gen_label_rtx ();
11448 code2 = swap_condition (code);
11449 code3 = unsigned_condition (code);
11453 case LT: case GT: case LTU: case GTU:
11456 case LE: code1 = LT; code2 = GT; break;
11457 case GE: code1 = GT; code2 = LT; break;
11458 case LEU: code1 = LTU; code2 = GTU; break;
11459 case GEU: code1 = GTU; code2 = LTU; break;
11461 case EQ: code1 = UNKNOWN; code2 = NE; break;
11462 case NE: code2 = UNKNOWN; break;
11465 gcc_unreachable ();
11470 * if (hi(a) < hi(b)) goto true;
11471 * if (hi(a) > hi(b)) goto false;
11472 * if (lo(a) < lo(b)) goto true;
11476 ix86_compare_op0 = hi[0];
11477 ix86_compare_op1 = hi[1];
11479 if (code1 != UNKNOWN)
11480 ix86_expand_branch (code1, label);
11481 if (code2 != UNKNOWN)
11482 ix86_expand_branch (code2, label2);
11484 ix86_compare_op0 = lo[0];
11485 ix86_compare_op1 = lo[1];
11486 ix86_expand_branch (code3, label);
11488 if (code2 != UNKNOWN)
11489 emit_label (label2);
11494 gcc_unreachable ();
11498 /* Split branch based on floating point condition. */
11500 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11501 rtx target1, rtx target2, rtx tmp, rtx pushed)
11503 rtx second, bypass;
11504 rtx label = NULL_RTX;
11506 int bypass_probability = -1, second_probability = -1, probability = -1;
11509 if (target2 != pc_rtx)
11512 code = reverse_condition_maybe_unordered (code);
11517 condition = ix86_expand_fp_compare (code, op1, op2,
11518 tmp, &second, &bypass);
11520 /* Remove pushed operand from stack. */
11522 ix86_free_from_memory (GET_MODE (pushed));
11524 if (split_branch_probability >= 0)
11526 /* Distribute the probabilities across the jumps.
11527 Assume the BYPASS and SECOND to be always test
11529 probability = split_branch_probability;
11531 /* Value of 1 is low enough to make no need for probability
11532 to be updated. Later we may run some experiments and see
11533 if unordered values are more frequent in practice. */
11535 bypass_probability = 1;
11537 second_probability = 1;
11539 if (bypass != NULL_RTX)
11541 label = gen_label_rtx ();
11542 i = emit_jump_insn (gen_rtx_SET
11544 gen_rtx_IF_THEN_ELSE (VOIDmode,
11546 gen_rtx_LABEL_REF (VOIDmode,
11549 if (bypass_probability >= 0)
11551 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11552 GEN_INT (bypass_probability),
11555 i = emit_jump_insn (gen_rtx_SET
11557 gen_rtx_IF_THEN_ELSE (VOIDmode,
11558 condition, target1, target2)));
11559 if (probability >= 0)
11561 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11562 GEN_INT (probability),
11564 if (second != NULL_RTX)
11566 i = emit_jump_insn (gen_rtx_SET
11568 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11570 if (second_probability >= 0)
11572 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11573 GEN_INT (second_probability),
11576 if (label != NULL_RTX)
11577 emit_label (label);
11581 ix86_expand_setcc (enum rtx_code code, rtx dest)
11583 rtx ret, tmp, tmpreg, equiv;
11584 rtx second_test, bypass_test;
11586 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11587 return 0; /* FAIL */
11589 gcc_assert (GET_MODE (dest) == QImode);
11591 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11592 PUT_MODE (ret, QImode);
11597 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11598 if (bypass_test || second_test)
11600 rtx test = second_test;
11602 rtx tmp2 = gen_reg_rtx (QImode);
11605 gcc_assert (!second_test);
11606 test = bypass_test;
11608 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11610 PUT_MODE (test, QImode);
11611 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11614 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11616 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11619 /* Attach a REG_EQUAL note describing the comparison result. */
11620 if (ix86_compare_op0 && ix86_compare_op1)
11622 equiv = simplify_gen_relational (code, QImode,
11623 GET_MODE (ix86_compare_op0),
11624 ix86_compare_op0, ix86_compare_op1);
11625 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11628 return 1; /* DONE */
11631 /* Expand comparison setting or clearing carry flag. Return true when
11632 successful and set pop for the operation. */
11634 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11636 enum machine_mode mode =
11637 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11639 /* Do not handle DImode compares that go through special path.
11640 Also we can't deal with FP compares yet. This is possible to add. */
11641 if (mode == (TARGET_64BIT ? TImode : DImode))
11644 if (SCALAR_FLOAT_MODE_P (mode))
11646 rtx second_test = NULL, bypass_test = NULL;
11647 rtx compare_op, compare_seq;
11649 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11651 /* Shortcut: following common codes never translate
11652 into carry flag compares. */
11653 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11654 || code == ORDERED || code == UNORDERED)
11657 /* These comparisons require zero flag; swap operands so they won't. */
11658 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11659 && !TARGET_IEEE_FP)
11664 code = swap_condition (code);
11667 /* Try to expand the comparison and verify that we end up with carry flag
11668 based comparison. This is fails to be true only when we decide to expand
11669 comparison using arithmetic that is not too common scenario. */
11671 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11672 &second_test, &bypass_test);
11673 compare_seq = get_insns ();
11676 if (second_test || bypass_test)
11678 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11679 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11680 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11682 code = GET_CODE (compare_op);
11683 if (code != LTU && code != GEU)
11685 emit_insn (compare_seq);
11689 if (!INTEGRAL_MODE_P (mode))
11697 /* Convert a==0 into (unsigned)a<1. */
11700 if (op1 != const0_rtx)
11703 code = (code == EQ ? LTU : GEU);
11706 /* Convert a>b into b<a or a>=b-1. */
11709 if (CONST_INT_P (op1))
11711 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11712 /* Bail out on overflow. We still can swap operands but that
11713 would force loading of the constant into register. */
11714 if (op1 == const0_rtx
11715 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11717 code = (code == GTU ? GEU : LTU);
11724 code = (code == GTU ? LTU : GEU);
11728 /* Convert a>=0 into (unsigned)a<0x80000000. */
11731 if (mode == DImode || op1 != const0_rtx)
11733 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11734 code = (code == LT ? GEU : LTU);
11738 if (mode == DImode || op1 != constm1_rtx)
11740 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11741 code = (code == LE ? GEU : LTU);
11747 /* Swapping operands may cause constant to appear as first operand. */
11748 if (!nonimmediate_operand (op0, VOIDmode))
11750 if (no_new_pseudos)
11752 op0 = force_reg (mode, op0);
11754 ix86_compare_op0 = op0;
11755 ix86_compare_op1 = op1;
11756 *pop = ix86_expand_compare (code, NULL, NULL);
11757 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11762 ix86_expand_int_movcc (rtx operands[])
11764 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11765 rtx compare_seq, compare_op;
11766 rtx second_test, bypass_test;
11767 enum machine_mode mode = GET_MODE (operands[0]);
11768 bool sign_bit_compare_p = false;;
11771 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11772 compare_seq = get_insns ();
11775 compare_code = GET_CODE (compare_op);
11777 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11778 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11779 sign_bit_compare_p = true;
11781 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11782 HImode insns, we'd be swallowed in word prefix ops. */
11784 if ((mode != HImode || TARGET_FAST_PREFIX)
11785 && (mode != (TARGET_64BIT ? TImode : DImode))
11786 && CONST_INT_P (operands[2])
11787 && CONST_INT_P (operands[3]))
11789 rtx out = operands[0];
11790 HOST_WIDE_INT ct = INTVAL (operands[2]);
11791 HOST_WIDE_INT cf = INTVAL (operands[3]);
11792 HOST_WIDE_INT diff;
11795 /* Sign bit compares are better done using shifts than we do by using
11797 if (sign_bit_compare_p
11798 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11799 ix86_compare_op1, &compare_op))
11801 /* Detect overlap between destination and compare sources. */
11804 if (!sign_bit_compare_p)
11806 bool fpcmp = false;
11808 compare_code = GET_CODE (compare_op);
11810 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11811 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11814 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11817 /* To simplify rest of code, restrict to the GEU case. */
11818 if (compare_code == LTU)
11820 HOST_WIDE_INT tmp = ct;
11823 compare_code = reverse_condition (compare_code);
11824 code = reverse_condition (code);
11829 PUT_CODE (compare_op,
11830 reverse_condition_maybe_unordered
11831 (GET_CODE (compare_op)));
11833 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11837 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11838 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11839 tmp = gen_reg_rtx (mode);
11841 if (mode == DImode)
11842 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11844 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11848 if (code == GT || code == GE)
11849 code = reverse_condition (code);
11852 HOST_WIDE_INT tmp = ct;
11857 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11858 ix86_compare_op1, VOIDmode, 0, -1);
11871 tmp = expand_simple_binop (mode, PLUS,
11873 copy_rtx (tmp), 1, OPTAB_DIRECT);
11884 tmp = expand_simple_binop (mode, IOR,
11886 copy_rtx (tmp), 1, OPTAB_DIRECT);
11888 else if (diff == -1 && ct)
11898 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11900 tmp = expand_simple_binop (mode, PLUS,
11901 copy_rtx (tmp), GEN_INT (cf),
11902 copy_rtx (tmp), 1, OPTAB_DIRECT);
11910 * andl cf - ct, dest
11920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11923 tmp = expand_simple_binop (mode, AND,
11925 gen_int_mode (cf - ct, mode),
11926 copy_rtx (tmp), 1, OPTAB_DIRECT);
11928 tmp = expand_simple_binop (mode, PLUS,
11929 copy_rtx (tmp), GEN_INT (ct),
11930 copy_rtx (tmp), 1, OPTAB_DIRECT);
11933 if (!rtx_equal_p (tmp, out))
11934 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11936 return 1; /* DONE */
11941 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
11944 tmp = ct, ct = cf, cf = tmp;
11947 if (SCALAR_FLOAT_MODE_P (cmp_mode))
11949 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
11951 /* We may be reversing unordered compare to normal compare, that
11952 is not valid in general (we may convert non-trapping condition
11953 to trapping one), however on i386 we currently emit all
11954 comparisons unordered. */
11955 compare_code = reverse_condition_maybe_unordered (compare_code);
11956 code = reverse_condition_maybe_unordered (code);
11960 compare_code = reverse_condition (compare_code);
11961 code = reverse_condition (code);
11965 compare_code = UNKNOWN;
11966 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11967 && CONST_INT_P (ix86_compare_op1))
11969 if (ix86_compare_op1 == const0_rtx
11970 && (code == LT || code == GE))
11971 compare_code = code;
11972 else if (ix86_compare_op1 == constm1_rtx)
11976 else if (code == GT)
11981 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11982 if (compare_code != UNKNOWN
11983 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11984 && (cf == -1 || ct == -1))
11986 /* If lea code below could be used, only optimize
11987 if it results in a 2 insn sequence. */
11989 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11990 || diff == 3 || diff == 5 || diff == 9)
11991 || (compare_code == LT && ct == -1)
11992 || (compare_code == GE && cf == -1))
11995 * notl op1 (if necessary)
12003 code = reverse_condition (code);
12006 out = emit_store_flag (out, code, ix86_compare_op0,
12007 ix86_compare_op1, VOIDmode, 0, -1);
12009 out = expand_simple_binop (mode, IOR,
12011 out, 1, OPTAB_DIRECT);
12012 if (out != operands[0])
12013 emit_move_insn (operands[0], out);
12015 return 1; /* DONE */
12020 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12021 || diff == 3 || diff == 5 || diff == 9)
12022 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12024 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12030 * lea cf(dest*(ct-cf)),dest
12034 * This also catches the degenerate setcc-only case.
12040 out = emit_store_flag (out, code, ix86_compare_op0,
12041 ix86_compare_op1, VOIDmode, 0, 1);
12044 /* On x86_64 the lea instruction operates on Pmode, so we need
12045 to get arithmetics done in proper mode to match. */
12047 tmp = copy_rtx (out);
12051 out1 = copy_rtx (out);
12052 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12056 tmp = gen_rtx_PLUS (mode, tmp, out1);
12062 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12065 if (!rtx_equal_p (tmp, out))
12068 out = force_operand (tmp, copy_rtx (out));
12070 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12072 if (!rtx_equal_p (out, operands[0]))
12073 emit_move_insn (operands[0], copy_rtx (out));
12075 return 1; /* DONE */
12079 * General case: Jumpful:
12080 * xorl dest,dest cmpl op1, op2
12081 * cmpl op1, op2 movl ct, dest
12082 * setcc dest jcc 1f
12083 * decl dest movl cf, dest
12084 * andl (cf-ct),dest 1:
12087 * Size 20. Size 14.
12089 * This is reasonably steep, but branch mispredict costs are
12090 * high on modern cpus, so consider failing only if optimizing
12094 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12095 && BRANCH_COST >= 2)
12099 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12104 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12106 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12108 /* We may be reversing unordered compare to normal compare,
12109 that is not valid in general (we may convert non-trapping
12110 condition to trapping one), however on i386 we currently
12111 emit all comparisons unordered. */
12112 code = reverse_condition_maybe_unordered (code);
12116 code = reverse_condition (code);
12117 if (compare_code != UNKNOWN)
12118 compare_code = reverse_condition (compare_code);
12122 if (compare_code != UNKNOWN)
12124 /* notl op1 (if needed)
12129 For x < 0 (resp. x <= -1) there will be no notl,
12130 so if possible swap the constants to get rid of the
12132 True/false will be -1/0 while code below (store flag
12133 followed by decrement) is 0/-1, so the constants need
12134 to be exchanged once more. */
12136 if (compare_code == GE || !cf)
12138 code = reverse_condition (code);
12143 HOST_WIDE_INT tmp = cf;
12148 out = emit_store_flag (out, code, ix86_compare_op0,
12149 ix86_compare_op1, VOIDmode, 0, -1);
12153 out = emit_store_flag (out, code, ix86_compare_op0,
12154 ix86_compare_op1, VOIDmode, 0, 1);
12156 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12157 copy_rtx (out), 1, OPTAB_DIRECT);
12160 out = expand_simple_binop (mode, AND, copy_rtx (out),
12161 gen_int_mode (cf - ct, mode),
12162 copy_rtx (out), 1, OPTAB_DIRECT);
12164 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12165 copy_rtx (out), 1, OPTAB_DIRECT);
12166 if (!rtx_equal_p (out, operands[0]))
12167 emit_move_insn (operands[0], copy_rtx (out));
12169 return 1; /* DONE */
12173 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12175 /* Try a few things more with specific constants and a variable. */
12178 rtx var, orig_out, out, tmp;
12180 if (BRANCH_COST <= 2)
12181 return 0; /* FAIL */
12183 /* If one of the two operands is an interesting constant, load a
12184 constant with the above and mask it in with a logical operation. */
12186 if (CONST_INT_P (operands[2]))
12189 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12190 operands[3] = constm1_rtx, op = and_optab;
12191 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12192 operands[3] = const0_rtx, op = ior_optab;
12194 return 0; /* FAIL */
12196 else if (CONST_INT_P (operands[3]))
12199 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12200 operands[2] = constm1_rtx, op = and_optab;
12201 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12202 operands[2] = const0_rtx, op = ior_optab;
12204 return 0; /* FAIL */
12207 return 0; /* FAIL */
12209 orig_out = operands[0];
12210 tmp = gen_reg_rtx (mode);
12213 /* Recurse to get the constant loaded. */
12214 if (ix86_expand_int_movcc (operands) == 0)
12215 return 0; /* FAIL */
12217 /* Mask in the interesting variable. */
12218 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12220 if (!rtx_equal_p (out, orig_out))
12221 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12223 return 1; /* DONE */
12227 * For comparison with above,
12237 if (! nonimmediate_operand (operands[2], mode))
12238 operands[2] = force_reg (mode, operands[2]);
12239 if (! nonimmediate_operand (operands[3], mode))
12240 operands[3] = force_reg (mode, operands[3]);
12242 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12244 rtx tmp = gen_reg_rtx (mode);
12245 emit_move_insn (tmp, operands[3]);
12248 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12250 rtx tmp = gen_reg_rtx (mode);
12251 emit_move_insn (tmp, operands[2]);
12255 if (! register_operand (operands[2], VOIDmode)
12257 || ! register_operand (operands[3], VOIDmode)))
12258 operands[2] = force_reg (mode, operands[2]);
12261 && ! register_operand (operands[3], VOIDmode))
12262 operands[3] = force_reg (mode, operands[3]);
12264 emit_insn (compare_seq);
12265 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12266 gen_rtx_IF_THEN_ELSE (mode,
12267 compare_op, operands[2],
12270 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12271 gen_rtx_IF_THEN_ELSE (mode,
12273 copy_rtx (operands[3]),
12274 copy_rtx (operands[0]))));
12276 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12277 gen_rtx_IF_THEN_ELSE (mode,
12279 copy_rtx (operands[2]),
12280 copy_rtx (operands[0]))));
12282 return 1; /* DONE */
12285 /* Swap, force into registers, or otherwise massage the two operands
12286 to an sse comparison with a mask result. Thus we differ a bit from
12287 ix86_prepare_fp_compare_args which expects to produce a flags result.
12289 The DEST operand exists to help determine whether to commute commutative
12290 operators. The POP0/POP1 operands are updated in place. The new
12291 comparison code is returned, or UNKNOWN if not implementable. */
12293 static enum rtx_code
12294 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12295 rtx *pop0, rtx *pop1)
12303 /* We have no LTGT as an operator. We could implement it with
12304 NE & ORDERED, but this requires an extra temporary. It's
12305 not clear that it's worth it. */
12312 /* These are supported directly. */
12319 /* For commutative operators, try to canonicalize the destination
12320 operand to be first in the comparison - this helps reload to
12321 avoid extra moves. */
12322 if (!dest || !rtx_equal_p (dest, *pop1))
12330 /* These are not supported directly. Swap the comparison operands
12331 to transform into something that is supported. */
12335 code = swap_condition (code);
12339 gcc_unreachable ();
12345 /* Detect conditional moves that exactly match min/max operational
12346 semantics. Note that this is IEEE safe, as long as we don't
12347 interchange the operands.
12349 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12350 and TRUE if the operation is successful and instructions are emitted. */
12353 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12354 rtx cmp_op1, rtx if_true, rtx if_false)
12356 enum machine_mode mode;
12362 else if (code == UNGE)
12365 if_true = if_false;
12371 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12373 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12378 mode = GET_MODE (dest);
12380 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12381 but MODE may be a vector mode and thus not appropriate. */
12382 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12384 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12387 if_true = force_reg (mode, if_true);
12388 v = gen_rtvec (2, if_true, if_false);
12389 tmp = gen_rtx_UNSPEC (mode, v, u);
12393 code = is_min ? SMIN : SMAX;
12394 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12397 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12401 /* Expand an sse vector comparison. Return the register with the result. */
12404 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12405 rtx op_true, rtx op_false)
12407 enum machine_mode mode = GET_MODE (dest);
12410 cmp_op0 = force_reg (mode, cmp_op0);
12411 if (!nonimmediate_operand (cmp_op1, mode))
12412 cmp_op1 = force_reg (mode, cmp_op1);
12415 || reg_overlap_mentioned_p (dest, op_true)
12416 || reg_overlap_mentioned_p (dest, op_false))
12417 dest = gen_reg_rtx (mode);
12419 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12420 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12425 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12426 operations. This is used for both scalar and vector conditional moves. */
12429 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12431 enum machine_mode mode = GET_MODE (dest);
12434 if (op_false == CONST0_RTX (mode))
12436 op_true = force_reg (mode, op_true);
12437 x = gen_rtx_AND (mode, cmp, op_true);
12438 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12440 else if (op_true == CONST0_RTX (mode))
12442 op_false = force_reg (mode, op_false);
12443 x = gen_rtx_NOT (mode, cmp);
12444 x = gen_rtx_AND (mode, x, op_false);
12445 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12449 op_true = force_reg (mode, op_true);
12450 op_false = force_reg (mode, op_false);
12452 t2 = gen_reg_rtx (mode);
12454 t3 = gen_reg_rtx (mode);
12458 x = gen_rtx_AND (mode, op_true, cmp);
12459 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12461 x = gen_rtx_NOT (mode, cmp);
12462 x = gen_rtx_AND (mode, x, op_false);
12463 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12465 x = gen_rtx_IOR (mode, t3, t2);
12466 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12470 /* Expand a floating-point conditional move. Return true if successful. */
12473 ix86_expand_fp_movcc (rtx operands[])
12475 enum machine_mode mode = GET_MODE (operands[0]);
12476 enum rtx_code code = GET_CODE (operands[1]);
12477 rtx tmp, compare_op, second_test, bypass_test;
12479 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12481 enum machine_mode cmode;
12483 /* Since we've no cmove for sse registers, don't force bad register
12484 allocation just to gain access to it. Deny movcc when the
12485 comparison mode doesn't match the move mode. */
12486 cmode = GET_MODE (ix86_compare_op0);
12487 if (cmode == VOIDmode)
12488 cmode = GET_MODE (ix86_compare_op1);
12492 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12494 &ix86_compare_op1);
12495 if (code == UNKNOWN)
12498 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12499 ix86_compare_op1, operands[2],
12503 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12504 ix86_compare_op1, operands[2], operands[3]);
12505 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12509 /* The floating point conditional move instructions don't directly
12510 support conditions resulting from a signed integer comparison. */
12512 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12514 /* The floating point conditional move instructions don't directly
12515 support signed integer comparisons. */
12517 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12519 gcc_assert (!second_test && !bypass_test);
12520 tmp = gen_reg_rtx (QImode);
12521 ix86_expand_setcc (code, tmp);
12523 ix86_compare_op0 = tmp;
12524 ix86_compare_op1 = const0_rtx;
12525 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12527 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12529 tmp = gen_reg_rtx (mode);
12530 emit_move_insn (tmp, operands[3]);
12533 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12535 tmp = gen_reg_rtx (mode);
12536 emit_move_insn (tmp, operands[2]);
12540 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12541 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12542 operands[2], operands[3])));
12544 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12545 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12546 operands[3], operands[0])));
12548 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12549 gen_rtx_IF_THEN_ELSE (mode, second_test,
12550 operands[2], operands[0])));
12555 /* Expand a floating-point vector conditional move; a vcond operation
12556 rather than a movcc operation. */
12559 ix86_expand_fp_vcond (rtx operands[])
12561 enum rtx_code code = GET_CODE (operands[3]);
12564 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12565 &operands[4], &operands[5]);
12566 if (code == UNKNOWN)
12569 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12570 operands[5], operands[1], operands[2]))
12573 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12574 operands[1], operands[2]);
12575 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12579 /* Expand a signed integral vector conditional move. */
12582 ix86_expand_int_vcond (rtx operands[])
12584 enum machine_mode mode = GET_MODE (operands[0]);
12585 enum rtx_code code = GET_CODE (operands[3]);
12586 bool negate = false;
12589 cop0 = operands[4];
12590 cop1 = operands[5];
12592 /* Canonicalize the comparison to EQ, GT, GTU. */
12603 code = reverse_condition (code);
12609 code = reverse_condition (code);
12615 code = swap_condition (code);
12616 x = cop0, cop0 = cop1, cop1 = x;
12620 gcc_unreachable ();
12623 /* Unsigned parallel compare is not supported by the hardware. Play some
12624 tricks to turn this into a signed comparison against 0. */
12627 cop0 = force_reg (mode, cop0);
12635 /* Perform a parallel modulo subtraction. */
12636 t1 = gen_reg_rtx (mode);
12637 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12639 /* Extract the original sign bit of op0. */
12640 mask = GEN_INT (-0x80000000);
12641 mask = gen_rtx_CONST_VECTOR (mode,
12642 gen_rtvec (4, mask, mask, mask, mask));
12643 mask = force_reg (mode, mask);
12644 t2 = gen_reg_rtx (mode);
12645 emit_insn (gen_andv4si3 (t2, cop0, mask));
12647 /* XOR it back into the result of the subtraction. This results
12648 in the sign bit set iff we saw unsigned underflow. */
12649 x = gen_reg_rtx (mode);
12650 emit_insn (gen_xorv4si3 (x, t1, t2));
12658 /* Perform a parallel unsigned saturating subtraction. */
12659 x = gen_reg_rtx (mode);
12660 emit_insn (gen_rtx_SET (VOIDmode, x,
12661 gen_rtx_US_MINUS (mode, cop0, cop1)));
12668 gcc_unreachable ();
12672 cop1 = CONST0_RTX (mode);
12675 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12676 operands[1+negate], operands[2-negate]);
12678 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12679 operands[2-negate]);
12683 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12684 true if we should do zero extension, else sign extension. HIGH_P is
12685 true if we want the N/2 high elements, else the low elements. */
12688 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12690 enum machine_mode imode = GET_MODE (operands[1]);
12691 rtx (*unpack)(rtx, rtx, rtx);
12698 unpack = gen_vec_interleave_highv16qi;
12700 unpack = gen_vec_interleave_lowv16qi;
12704 unpack = gen_vec_interleave_highv8hi;
12706 unpack = gen_vec_interleave_lowv8hi;
12710 unpack = gen_vec_interleave_highv4si;
12712 unpack = gen_vec_interleave_lowv4si;
12715 gcc_unreachable ();
12718 dest = gen_lowpart (imode, operands[0]);
12721 se = force_reg (imode, CONST0_RTX (imode));
12723 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12724 operands[1], pc_rtx, pc_rtx);
12726 emit_insn (unpack (dest, operands[1], se));
12729 /* Expand conditional increment or decrement using adb/sbb instructions.
12730 The default case using setcc followed by the conditional move can be
12731 done by generic code. */
12733 ix86_expand_int_addcc (rtx operands[])
12735 enum rtx_code code = GET_CODE (operands[1]);
12737 rtx val = const0_rtx;
12738 bool fpcmp = false;
12739 enum machine_mode mode = GET_MODE (operands[0]);
12741 if (operands[3] != const1_rtx
12742 && operands[3] != constm1_rtx)
12744 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12745 ix86_compare_op1, &compare_op))
12747 code = GET_CODE (compare_op);
12749 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12750 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12753 code = ix86_fp_compare_code_to_integer (code);
12760 PUT_CODE (compare_op,
12761 reverse_condition_maybe_unordered
12762 (GET_CODE (compare_op)));
12764 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12766 PUT_MODE (compare_op, mode);
12768 /* Construct either adc or sbb insn. */
12769 if ((code == LTU) == (operands[3] == constm1_rtx))
12771 switch (GET_MODE (operands[0]))
12774 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12777 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12780 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12783 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12786 gcc_unreachable ();
12791 switch (GET_MODE (operands[0]))
12794 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12797 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12800 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12803 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12806 gcc_unreachable ();
12809 return 1; /* DONE */
12813 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12814 works for floating pointer parameters and nonoffsetable memories.
12815 For pushes, it returns just stack offsets; the values will be saved
12816 in the right order. Maximally three parts are generated. */
12819 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12824 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12826 size = (GET_MODE_SIZE (mode) + 4) / 8;
12828 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12829 gcc_assert (size >= 2 && size <= 3);
12831 /* Optimize constant pool reference to immediates. This is used by fp
12832 moves, that force all constants to memory to allow combining. */
12833 if (MEM_P (operand) && MEM_READONLY_P (operand))
12835 rtx tmp = maybe_get_pool_constant (operand);
12840 if (MEM_P (operand) && !offsettable_memref_p (operand))
12842 /* The only non-offsetable memories we handle are pushes. */
12843 int ok = push_operand (operand, VOIDmode);
12847 operand = copy_rtx (operand);
12848 PUT_MODE (operand, Pmode);
12849 parts[0] = parts[1] = parts[2] = operand;
12853 if (GET_CODE (operand) == CONST_VECTOR)
12855 enum machine_mode imode = int_mode_for_mode (mode);
12856 /* Caution: if we looked through a constant pool memory above,
12857 the operand may actually have a different mode now. That's
12858 ok, since we want to pun this all the way back to an integer. */
12859 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12860 gcc_assert (operand != NULL);
12866 if (mode == DImode)
12867 split_di (&operand, 1, &parts[0], &parts[1]);
12870 if (REG_P (operand))
12872 gcc_assert (reload_completed);
12873 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12874 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12876 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12878 else if (offsettable_memref_p (operand))
12880 operand = adjust_address (operand, SImode, 0);
12881 parts[0] = operand;
12882 parts[1] = adjust_address (operand, SImode, 4);
12884 parts[2] = adjust_address (operand, SImode, 8);
12886 else if (GET_CODE (operand) == CONST_DOUBLE)
12891 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12895 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12896 parts[2] = gen_int_mode (l[2], SImode);
12899 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12902 gcc_unreachable ();
12904 parts[1] = gen_int_mode (l[1], SImode);
12905 parts[0] = gen_int_mode (l[0], SImode);
12908 gcc_unreachable ();
12913 if (mode == TImode)
12914 split_ti (&operand, 1, &parts[0], &parts[1]);
12915 if (mode == XFmode || mode == TFmode)
12917 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12918 if (REG_P (operand))
12920 gcc_assert (reload_completed);
12921 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12922 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12924 else if (offsettable_memref_p (operand))
12926 operand = adjust_address (operand, DImode, 0);
12927 parts[0] = operand;
12928 parts[1] = adjust_address (operand, upper_mode, 8);
12930 else if (GET_CODE (operand) == CONST_DOUBLE)
12935 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12936 real_to_target (l, &r, mode);
12938 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12939 if (HOST_BITS_PER_WIDE_INT >= 64)
12942 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12943 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12946 parts[0] = immed_double_const (l[0], l[1], DImode);
12948 if (upper_mode == SImode)
12949 parts[1] = gen_int_mode (l[2], SImode);
12950 else if (HOST_BITS_PER_WIDE_INT >= 64)
12953 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12954 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12957 parts[1] = immed_double_const (l[2], l[3], DImode);
12960 gcc_unreachable ();
12967 /* Emit insns to perform a move or push of DI, DF, and XF values.
12968 Return false when normal moves are needed; true when all required
12969 insns have been emitted. Operands 2-4 contain the input values
12970 int the correct order; operands 5-7 contain the output values. */
12973 ix86_split_long_move (rtx operands[])
12978 int collisions = 0;
12979 enum machine_mode mode = GET_MODE (operands[0]);
12981 /* The DFmode expanders may ask us to move double.
12982 For 64bit target this is single move. By hiding the fact
12983 here we simplify i386.md splitters. */
12984 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12986 /* Optimize constant pool reference to immediates. This is used by
12987 fp moves, that force all constants to memory to allow combining. */
12989 if (MEM_P (operands[1])
12990 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12991 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12992 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12993 if (push_operand (operands[0], VOIDmode))
12995 operands[0] = copy_rtx (operands[0]);
12996 PUT_MODE (operands[0], Pmode);
12999 operands[0] = gen_lowpart (DImode, operands[0]);
13000 operands[1] = gen_lowpart (DImode, operands[1]);
13001 emit_move_insn (operands[0], operands[1]);
13005 /* The only non-offsettable memory we handle is push. */
13006 if (push_operand (operands[0], VOIDmode))
13009 gcc_assert (!MEM_P (operands[0])
13010 || offsettable_memref_p (operands[0]));
13012 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13013 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13015 /* When emitting push, take care for source operands on the stack. */
13016 if (push && MEM_P (operands[1])
13017 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13020 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13021 XEXP (part[1][2], 0));
13022 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13023 XEXP (part[1][1], 0));
13026 /* We need to do copy in the right order in case an address register
13027 of the source overlaps the destination. */
13028 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13030 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13032 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13035 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13038 /* Collision in the middle part can be handled by reordering. */
13039 if (collisions == 1 && nparts == 3
13040 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13043 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13044 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13047 /* If there are more collisions, we can't handle it by reordering.
13048 Do an lea to the last part and use only one colliding move. */
13049 else if (collisions > 1)
13055 base = part[0][nparts - 1];
13057 /* Handle the case when the last part isn't valid for lea.
13058 Happens in 64-bit mode storing the 12-byte XFmode. */
13059 if (GET_MODE (base) != Pmode)
13060 base = gen_rtx_REG (Pmode, REGNO (base));
13062 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13063 part[1][0] = replace_equiv_address (part[1][0], base);
13064 part[1][1] = replace_equiv_address (part[1][1],
13065 plus_constant (base, UNITS_PER_WORD));
13067 part[1][2] = replace_equiv_address (part[1][2],
13068 plus_constant (base, 8));
13078 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13079 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13080 emit_move_insn (part[0][2], part[1][2]);
13085 /* In 64bit mode we don't have 32bit push available. In case this is
13086 register, it is OK - we will just use larger counterpart. We also
13087 retype memory - these comes from attempt to avoid REX prefix on
13088 moving of second half of TFmode value. */
13089 if (GET_MODE (part[1][1]) == SImode)
13091 switch (GET_CODE (part[1][1]))
13094 part[1][1] = adjust_address (part[1][1], DImode, 0);
13098 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13102 gcc_unreachable ();
13105 if (GET_MODE (part[1][0]) == SImode)
13106 part[1][0] = part[1][1];
13109 emit_move_insn (part[0][1], part[1][1]);
13110 emit_move_insn (part[0][0], part[1][0]);
13114 /* Choose correct order to not overwrite the source before it is copied. */
13115 if ((REG_P (part[0][0])
13116 && REG_P (part[1][1])
13117 && (REGNO (part[0][0]) == REGNO (part[1][1])
13119 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13121 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13125 operands[2] = part[0][2];
13126 operands[3] = part[0][1];
13127 operands[4] = part[0][0];
13128 operands[5] = part[1][2];
13129 operands[6] = part[1][1];
13130 operands[7] = part[1][0];
13134 operands[2] = part[0][1];
13135 operands[3] = part[0][0];
13136 operands[5] = part[1][1];
13137 operands[6] = part[1][0];
13144 operands[2] = part[0][0];
13145 operands[3] = part[0][1];
13146 operands[4] = part[0][2];
13147 operands[5] = part[1][0];
13148 operands[6] = part[1][1];
13149 operands[7] = part[1][2];
13153 operands[2] = part[0][0];
13154 operands[3] = part[0][1];
13155 operands[5] = part[1][0];
13156 operands[6] = part[1][1];
13160 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13163 if (CONST_INT_P (operands[5])
13164 && operands[5] != const0_rtx
13165 && REG_P (operands[2]))
13167 if (CONST_INT_P (operands[6])
13168 && INTVAL (operands[6]) == INTVAL (operands[5]))
13169 operands[6] = operands[2];
13172 && CONST_INT_P (operands[7])
13173 && INTVAL (operands[7]) == INTVAL (operands[5]))
13174 operands[7] = operands[2];
13178 && CONST_INT_P (operands[6])
13179 && operands[6] != const0_rtx
13180 && REG_P (operands[3])
13181 && CONST_INT_P (operands[7])
13182 && INTVAL (operands[7]) == INTVAL (operands[6]))
13183 operands[7] = operands[3];
13186 emit_move_insn (operands[2], operands[5]);
13187 emit_move_insn (operands[3], operands[6]);
13189 emit_move_insn (operands[4], operands[7]);
13194 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13195 left shift by a constant, either using a single shift or
13196 a sequence of add instructions. */
13199 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13203 emit_insn ((mode == DImode
13205 : gen_adddi3) (operand, operand, operand));
13207 else if (!optimize_size
13208 && count * ix86_cost->add <= ix86_cost->shift_const)
13211 for (i=0; i<count; i++)
13213 emit_insn ((mode == DImode
13215 : gen_adddi3) (operand, operand, operand));
13219 emit_insn ((mode == DImode
13221 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13225 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13227 rtx low[2], high[2];
13229 const int single_width = mode == DImode ? 32 : 64;
13231 if (CONST_INT_P (operands[2]))
13233 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13234 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13236 if (count >= single_width)
13238 emit_move_insn (high[0], low[1]);
13239 emit_move_insn (low[0], const0_rtx);
13241 if (count > single_width)
13242 ix86_expand_ashl_const (high[0], count - single_width, mode);
13246 if (!rtx_equal_p (operands[0], operands[1]))
13247 emit_move_insn (operands[0], operands[1]);
13248 emit_insn ((mode == DImode
13250 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13251 ix86_expand_ashl_const (low[0], count, mode);
13256 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13258 if (operands[1] == const1_rtx)
13260 /* Assuming we've chosen a QImode capable registers, then 1 << N
13261 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13262 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13264 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13266 ix86_expand_clear (low[0]);
13267 ix86_expand_clear (high[0]);
13268 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13270 d = gen_lowpart (QImode, low[0]);
13271 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13272 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13273 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13275 d = gen_lowpart (QImode, high[0]);
13276 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13277 s = gen_rtx_NE (QImode, flags, const0_rtx);
13278 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13281 /* Otherwise, we can get the same results by manually performing
13282 a bit extract operation on bit 5/6, and then performing the two
13283 shifts. The two methods of getting 0/1 into low/high are exactly
13284 the same size. Avoiding the shift in the bit extract case helps
13285 pentium4 a bit; no one else seems to care much either way. */
13290 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13291 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13293 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13294 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13296 emit_insn ((mode == DImode
13298 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13299 emit_insn ((mode == DImode
13301 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13302 emit_move_insn (low[0], high[0]);
13303 emit_insn ((mode == DImode
13305 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13308 emit_insn ((mode == DImode
13310 : gen_ashldi3) (low[0], low[0], operands[2]));
13311 emit_insn ((mode == DImode
13313 : gen_ashldi3) (high[0], high[0], operands[2]));
13317 if (operands[1] == constm1_rtx)
13319 /* For -1 << N, we can avoid the shld instruction, because we
13320 know that we're shifting 0...31/63 ones into a -1. */
13321 emit_move_insn (low[0], constm1_rtx);
13323 emit_move_insn (high[0], low[0]);
13325 emit_move_insn (high[0], constm1_rtx);
13329 if (!rtx_equal_p (operands[0], operands[1]))
13330 emit_move_insn (operands[0], operands[1]);
13332 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13333 emit_insn ((mode == DImode
13335 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13338 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13340 if (TARGET_CMOVE && scratch)
13342 ix86_expand_clear (scratch);
13343 emit_insn ((mode == DImode
13344 ? gen_x86_shift_adj_1
13345 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13348 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13352 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13354 rtx low[2], high[2];
13356 const int single_width = mode == DImode ? 32 : 64;
13358 if (CONST_INT_P (operands[2]))
13360 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13361 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13363 if (count == single_width * 2 - 1)
13365 emit_move_insn (high[0], high[1]);
13366 emit_insn ((mode == DImode
13368 : gen_ashrdi3) (high[0], high[0],
13369 GEN_INT (single_width - 1)));
13370 emit_move_insn (low[0], high[0]);
13373 else if (count >= single_width)
13375 emit_move_insn (low[0], high[1]);
13376 emit_move_insn (high[0], low[0]);
13377 emit_insn ((mode == DImode
13379 : gen_ashrdi3) (high[0], high[0],
13380 GEN_INT (single_width - 1)));
13381 if (count > single_width)
13382 emit_insn ((mode == DImode
13384 : gen_ashrdi3) (low[0], low[0],
13385 GEN_INT (count - single_width)));
13389 if (!rtx_equal_p (operands[0], operands[1]))
13390 emit_move_insn (operands[0], operands[1]);
13391 emit_insn ((mode == DImode
13393 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13394 emit_insn ((mode == DImode
13396 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13401 if (!rtx_equal_p (operands[0], operands[1]))
13402 emit_move_insn (operands[0], operands[1]);
13404 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13406 emit_insn ((mode == DImode
13408 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13409 emit_insn ((mode == DImode
13411 : gen_ashrdi3) (high[0], high[0], operands[2]));
13413 if (TARGET_CMOVE && scratch)
13415 emit_move_insn (scratch, high[0]);
13416 emit_insn ((mode == DImode
13418 : gen_ashrdi3) (scratch, scratch,
13419 GEN_INT (single_width - 1)));
13420 emit_insn ((mode == DImode
13421 ? gen_x86_shift_adj_1
13422 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13426 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13431 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13433 rtx low[2], high[2];
13435 const int single_width = mode == DImode ? 32 : 64;
13437 if (CONST_INT_P (operands[2]))
13439 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13440 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13442 if (count >= single_width)
13444 emit_move_insn (low[0], high[1]);
13445 ix86_expand_clear (high[0]);
13447 if (count > single_width)
13448 emit_insn ((mode == DImode
13450 : gen_lshrdi3) (low[0], low[0],
13451 GEN_INT (count - single_width)));
13455 if (!rtx_equal_p (operands[0], operands[1]))
13456 emit_move_insn (operands[0], operands[1]);
13457 emit_insn ((mode == DImode
13459 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13460 emit_insn ((mode == DImode
13462 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13467 if (!rtx_equal_p (operands[0], operands[1]))
13468 emit_move_insn (operands[0], operands[1]);
13470 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13472 emit_insn ((mode == DImode
13474 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13475 emit_insn ((mode == DImode
13477 : gen_lshrdi3) (high[0], high[0], operands[2]));
13479 /* Heh. By reversing the arguments, we can reuse this pattern. */
13480 if (TARGET_CMOVE && scratch)
13482 ix86_expand_clear (scratch);
13483 emit_insn ((mode == DImode
13484 ? gen_x86_shift_adj_1
13485 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13489 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13493 /* Predict just emitted jump instruction to be taken with probability PROB. */
13495 predict_jump (int prob)
13497 rtx insn = get_last_insn ();
13498 gcc_assert (JUMP_P (insn));
13500 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13505 /* Helper function for the string operations below. Dest VARIABLE whether
13506 it is aligned to VALUE bytes. If true, jump to the label. */
13508 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13510 rtx label = gen_label_rtx ();
13511 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13512 if (GET_MODE (variable) == DImode)
13513 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13515 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13516 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13519 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13521 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13525 /* Adjust COUNTER by the VALUE. */
13527 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13529 if (GET_MODE (countreg) == DImode)
13530 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13532 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13535 /* Zero extend possibly SImode EXP to Pmode register. */
13537 ix86_zero_extend_to_Pmode (rtx exp)
13540 if (GET_MODE (exp) == VOIDmode)
13541 return force_reg (Pmode, exp);
13542 if (GET_MODE (exp) == Pmode)
13543 return copy_to_mode_reg (Pmode, exp);
13544 r = gen_reg_rtx (Pmode);
13545 emit_insn (gen_zero_extendsidi2 (r, exp));
13549 /* Divide COUNTREG by SCALE. */
13551 scale_counter (rtx countreg, int scale)
13554 rtx piece_size_mask;
13558 if (CONST_INT_P (countreg))
13559 return GEN_INT (INTVAL (countreg) / scale);
13560 gcc_assert (REG_P (countreg));
13562 piece_size_mask = GEN_INT (scale - 1);
13563 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13564 GEN_INT (exact_log2 (scale)),
13565 NULL, 1, OPTAB_DIRECT);
13569 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13570 DImode for constant loop counts. */
13572 static enum machine_mode
13573 counter_mode (rtx count_exp)
13575 if (GET_MODE (count_exp) != VOIDmode)
13576 return GET_MODE (count_exp);
13577 if (GET_CODE (count_exp) != CONST_INT)
13579 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13584 /* When SRCPTR is non-NULL, output simple loop to move memory
13585 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13586 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13587 equivalent loop to set memory by VALUE (supposed to be in MODE).
13589 The size is rounded down to whole number of chunk size moved at once.
13590 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13594 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13595 rtx destptr, rtx srcptr, rtx value,
13596 rtx count, enum machine_mode mode, int unroll,
13599 rtx out_label, top_label, iter, tmp;
13600 enum machine_mode iter_mode = counter_mode (count);
13601 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13602 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13608 top_label = gen_label_rtx ();
13609 out_label = gen_label_rtx ();
13610 iter = gen_reg_rtx (iter_mode);
13612 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13613 NULL, 1, OPTAB_DIRECT);
13614 /* Those two should combine. */
13615 if (piece_size == const1_rtx)
13617 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13619 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13621 emit_move_insn (iter, const0_rtx);
13623 emit_label (top_label);
13625 tmp = convert_modes (Pmode, iter_mode, iter, true);
13626 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13627 destmem = change_address (destmem, mode, x_addr);
13631 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13632 srcmem = change_address (srcmem, mode, y_addr);
13634 /* When unrolling for chips that reorder memory reads and writes,
13635 we can save registers by using single temporary.
13636 Also using 4 temporaries is overkill in 32bit mode. */
13637 if (!TARGET_64BIT && 0)
13639 for (i = 0; i < unroll; i++)
13644 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13646 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13648 emit_move_insn (destmem, srcmem);
13654 gcc_assert (unroll <= 4);
13655 for (i = 0; i < unroll; i++)
13657 tmpreg[i] = gen_reg_rtx (mode);
13661 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13663 emit_move_insn (tmpreg[i], srcmem);
13665 for (i = 0; i < unroll; i++)
13670 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13672 emit_move_insn (destmem, tmpreg[i]);
13677 for (i = 0; i < unroll; i++)
13681 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13682 emit_move_insn (destmem, value);
13685 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13686 true, OPTAB_LIB_WIDEN);
13688 emit_move_insn (iter, tmp);
13690 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13692 if (expected_size != -1)
13694 expected_size /= GET_MODE_SIZE (mode) * unroll;
13695 if (expected_size == 0)
13697 else if (expected_size > REG_BR_PROB_BASE)
13698 predict_jump (REG_BR_PROB_BASE - 1);
13700 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13703 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13704 iter = ix86_zero_extend_to_Pmode (iter);
13705 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13706 true, OPTAB_LIB_WIDEN);
13707 if (tmp != destptr)
13708 emit_move_insn (destptr, tmp);
13711 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13712 true, OPTAB_LIB_WIDEN);
13714 emit_move_insn (srcptr, tmp);
13716 emit_label (out_label);
13719 /* Output "rep; mov" instruction.
13720 Arguments have same meaning as for previous function */
13722 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13723 rtx destptr, rtx srcptr,
13725 enum machine_mode mode)
13731 /* If the size is known, it is shorter to use rep movs. */
13732 if (mode == QImode && CONST_INT_P (count)
13733 && !(INTVAL (count) & 3))
13736 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13737 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13738 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13739 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13740 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13741 if (mode != QImode)
13743 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13744 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13745 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13746 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13747 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13748 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13752 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13753 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13755 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13759 /* Output "rep; stos" instruction.
13760 Arguments have same meaning as for previous function */
13762 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13764 enum machine_mode mode)
13769 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13770 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13771 value = force_reg (mode, gen_lowpart (mode, value));
13772 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13773 if (mode != QImode)
13775 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13776 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13777 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13780 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13781 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13785 emit_strmov (rtx destmem, rtx srcmem,
13786 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13788 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13789 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13790 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13793 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13795 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13796 rtx destptr, rtx srcptr, rtx count, int max_size)
13799 if (CONST_INT_P (count))
13801 HOST_WIDE_INT countval = INTVAL (count);
13804 if ((countval & 0x10) && max_size > 16)
13808 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13809 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13812 gcc_unreachable ();
13815 if ((countval & 0x08) && max_size > 8)
13818 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13821 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13822 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13826 if ((countval & 0x04) && max_size > 4)
13828 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13831 if ((countval & 0x02) && max_size > 2)
13833 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13836 if ((countval & 0x01) && max_size > 1)
13838 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13845 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13846 count, 1, OPTAB_DIRECT);
13847 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13848 count, QImode, 1, 4);
13852 /* When there are stringops, we can cheaply increase dest and src pointers.
13853 Otherwise we save code size by maintaining offset (zero is readily
13854 available from preceding rep operation) and using x86 addressing modes.
13856 if (TARGET_SINGLE_STRINGOP)
13860 rtx label = ix86_expand_aligntest (count, 4, true);
13861 src = change_address (srcmem, SImode, srcptr);
13862 dest = change_address (destmem, SImode, destptr);
13863 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13864 emit_label (label);
13865 LABEL_NUSES (label) = 1;
13869 rtx label = ix86_expand_aligntest (count, 2, true);
13870 src = change_address (srcmem, HImode, srcptr);
13871 dest = change_address (destmem, HImode, destptr);
13872 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13873 emit_label (label);
13874 LABEL_NUSES (label) = 1;
13878 rtx label = ix86_expand_aligntest (count, 1, true);
13879 src = change_address (srcmem, QImode, srcptr);
13880 dest = change_address (destmem, QImode, destptr);
13881 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13882 emit_label (label);
13883 LABEL_NUSES (label) = 1;
13888 rtx offset = force_reg (Pmode, const0_rtx);
13893 rtx label = ix86_expand_aligntest (count, 4, true);
13894 src = change_address (srcmem, SImode, srcptr);
13895 dest = change_address (destmem, SImode, destptr);
13896 emit_move_insn (dest, src);
13897 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13898 true, OPTAB_LIB_WIDEN);
13900 emit_move_insn (offset, tmp);
13901 emit_label (label);
13902 LABEL_NUSES (label) = 1;
13906 rtx label = ix86_expand_aligntest (count, 2, true);
13907 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13908 src = change_address (srcmem, HImode, tmp);
13909 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13910 dest = change_address (destmem, HImode, tmp);
13911 emit_move_insn (dest, src);
13912 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13913 true, OPTAB_LIB_WIDEN);
13915 emit_move_insn (offset, tmp);
13916 emit_label (label);
13917 LABEL_NUSES (label) = 1;
13921 rtx label = ix86_expand_aligntest (count, 1, true);
13922 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13923 src = change_address (srcmem, QImode, tmp);
13924 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13925 dest = change_address (destmem, QImode, tmp);
13926 emit_move_insn (dest, src);
13927 emit_label (label);
13928 LABEL_NUSES (label) = 1;
13933 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13935 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13936 rtx count, int max_size)
13939 expand_simple_binop (counter_mode (count), AND, count,
13940 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13941 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13942 gen_lowpart (QImode, value), count, QImode,
13946 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13948 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13952 if (CONST_INT_P (count))
13954 HOST_WIDE_INT countval = INTVAL (count);
13957 if ((countval & 0x10) && max_size > 16)
13961 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13962 emit_insn (gen_strset (destptr, dest, value));
13963 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13964 emit_insn (gen_strset (destptr, dest, value));
13967 gcc_unreachable ();
13970 if ((countval & 0x08) && max_size > 8)
13974 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13975 emit_insn (gen_strset (destptr, dest, value));
13979 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13980 emit_insn (gen_strset (destptr, dest, value));
13981 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13982 emit_insn (gen_strset (destptr, dest, value));
13986 if ((countval & 0x04) && max_size > 4)
13988 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13989 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13992 if ((countval & 0x02) && max_size > 2)
13994 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13995 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13998 if ((countval & 0x01) && max_size > 1)
14000 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14001 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14008 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14013 rtx label = ix86_expand_aligntest (count, 16, true);
14016 dest = change_address (destmem, DImode, destptr);
14017 emit_insn (gen_strset (destptr, dest, value));
14018 emit_insn (gen_strset (destptr, dest, value));
14022 dest = change_address (destmem, SImode, destptr);
14023 emit_insn (gen_strset (destptr, dest, value));
14024 emit_insn (gen_strset (destptr, dest, value));
14025 emit_insn (gen_strset (destptr, dest, value));
14026 emit_insn (gen_strset (destptr, dest, value));
14028 emit_label (label);
14029 LABEL_NUSES (label) = 1;
14033 rtx label = ix86_expand_aligntest (count, 8, true);
14036 dest = change_address (destmem, DImode, destptr);
14037 emit_insn (gen_strset (destptr, dest, value));
14041 dest = change_address (destmem, SImode, destptr);
14042 emit_insn (gen_strset (destptr, dest, value));
14043 emit_insn (gen_strset (destptr, dest, value));
14045 emit_label (label);
14046 LABEL_NUSES (label) = 1;
14050 rtx label = ix86_expand_aligntest (count, 4, true);
14051 dest = change_address (destmem, SImode, destptr);
14052 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14053 emit_label (label);
14054 LABEL_NUSES (label) = 1;
14058 rtx label = ix86_expand_aligntest (count, 2, true);
14059 dest = change_address (destmem, HImode, destptr);
14060 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14061 emit_label (label);
14062 LABEL_NUSES (label) = 1;
14066 rtx label = ix86_expand_aligntest (count, 1, true);
14067 dest = change_address (destmem, QImode, destptr);
14068 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14069 emit_label (label);
14070 LABEL_NUSES (label) = 1;
14074 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14075 DESIRED_ALIGNMENT. */
14077 expand_movmem_prologue (rtx destmem, rtx srcmem,
14078 rtx destptr, rtx srcptr, rtx count,
14079 int align, int desired_alignment)
14081 if (align <= 1 && desired_alignment > 1)
14083 rtx label = ix86_expand_aligntest (destptr, 1, false);
14084 srcmem = change_address (srcmem, QImode, srcptr);
14085 destmem = change_address (destmem, QImode, destptr);
14086 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14087 ix86_adjust_counter (count, 1);
14088 emit_label (label);
14089 LABEL_NUSES (label) = 1;
14091 if (align <= 2 && desired_alignment > 2)
14093 rtx label = ix86_expand_aligntest (destptr, 2, false);
14094 srcmem = change_address (srcmem, HImode, srcptr);
14095 destmem = change_address (destmem, HImode, destptr);
14096 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14097 ix86_adjust_counter (count, 2);
14098 emit_label (label);
14099 LABEL_NUSES (label) = 1;
14101 if (align <= 4 && desired_alignment > 4)
14103 rtx label = ix86_expand_aligntest (destptr, 4, false);
14104 srcmem = change_address (srcmem, SImode, srcptr);
14105 destmem = change_address (destmem, SImode, destptr);
14106 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14107 ix86_adjust_counter (count, 4);
14108 emit_label (label);
14109 LABEL_NUSES (label) = 1;
14111 gcc_assert (desired_alignment <= 8);
14114 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14115 DESIRED_ALIGNMENT. */
14117 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14118 int align, int desired_alignment)
14120 if (align <= 1 && desired_alignment > 1)
14122 rtx label = ix86_expand_aligntest (destptr, 1, false);
14123 destmem = change_address (destmem, QImode, destptr);
14124 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14125 ix86_adjust_counter (count, 1);
14126 emit_label (label);
14127 LABEL_NUSES (label) = 1;
14129 if (align <= 2 && desired_alignment > 2)
14131 rtx label = ix86_expand_aligntest (destptr, 2, false);
14132 destmem = change_address (destmem, HImode, destptr);
14133 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14134 ix86_adjust_counter (count, 2);
14135 emit_label (label);
14136 LABEL_NUSES (label) = 1;
14138 if (align <= 4 && desired_alignment > 4)
14140 rtx label = ix86_expand_aligntest (destptr, 4, false);
14141 destmem = change_address (destmem, SImode, destptr);
14142 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14143 ix86_adjust_counter (count, 4);
14144 emit_label (label);
14145 LABEL_NUSES (label) = 1;
14147 gcc_assert (desired_alignment <= 8);
14150 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14151 static enum stringop_alg
14152 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14153 int *dynamic_check)
14155 const struct stringop_algs * algs;
14157 *dynamic_check = -1;
14159 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14161 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14162 if (stringop_alg != no_stringop)
14163 return stringop_alg;
14164 /* rep; movq or rep; movl is the smallest variant. */
14165 else if (optimize_size)
14167 if (!count || (count & 3))
14168 return rep_prefix_1_byte;
14170 return rep_prefix_4_byte;
14172 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14174 else if (expected_size != -1 && expected_size < 4)
14175 return loop_1_byte;
14176 else if (expected_size != -1)
14179 enum stringop_alg alg = libcall;
14180 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14182 gcc_assert (algs->size[i].max);
14183 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14185 if (algs->size[i].alg != libcall)
14186 alg = algs->size[i].alg;
14187 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14188 last non-libcall inline algorithm. */
14189 if (TARGET_INLINE_ALL_STRINGOPS)
14191 /* When the current size is best to be copied by a libcall,
14192 but we are still forced to inline, run the heuristic bellow
14193 that will pick code for medium sized blocks. */
14194 if (alg != libcall)
14199 return algs->size[i].alg;
14202 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14204 /* When asked to inline the call anyway, try to pick meaningful choice.
14205 We look for maximal size of block that is faster to copy by hand and
14206 take blocks of at most of that size guessing that average size will
14207 be roughly half of the block.
14209 If this turns out to be bad, we might simply specify the preferred
14210 choice in ix86_costs. */
14211 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14212 && algs->unknown_size == libcall)
14215 enum stringop_alg alg;
14218 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14219 if (algs->size[i].alg != libcall && algs->size[i].alg)
14220 max = algs->size[i].max;
14223 alg = decide_alg (count, max / 2, memset, dynamic_check);
14224 gcc_assert (*dynamic_check == -1);
14225 gcc_assert (alg != libcall);
14226 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14227 *dynamic_check = max;
14230 return algs->unknown_size;
14233 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14234 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14236 decide_alignment (int align,
14237 enum stringop_alg alg,
14240 int desired_align = 0;
14244 gcc_unreachable ();
14246 case unrolled_loop:
14247 desired_align = GET_MODE_SIZE (Pmode);
14249 case rep_prefix_8_byte:
14252 case rep_prefix_4_byte:
14253 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14254 copying whole cacheline at once. */
14255 if (TARGET_PENTIUMPRO)
14260 case rep_prefix_1_byte:
14261 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14262 copying whole cacheline at once. */
14263 if (TARGET_PENTIUMPRO)
14277 if (desired_align < align)
14278 desired_align = align;
14279 if (expected_size != -1 && expected_size < 4)
14280 desired_align = align;
14281 return desired_align;
14284 /* Return the smallest power of 2 greater than VAL. */
14286 smallest_pow2_greater_than (int val)
14294 /* Expand string move (memcpy) operation. Use i386 string operations when
14295 profitable. expand_clrmem contains similar code. The code depends upon
14296 architecture, block size and alignment, but always has the same
14299 1) Prologue guard: Conditional that jumps up to epilogues for small
14300 blocks that can be handled by epilogue alone. This is faster but
14301 also needed for correctness, since prologue assume the block is larger
14302 than the desired alignment.
14304 Optional dynamic check for size and libcall for large
14305 blocks is emitted here too, with -minline-stringops-dynamically.
14307 2) Prologue: copy first few bytes in order to get destination aligned
14308 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14309 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14310 We emit either a jump tree on power of two sized blocks, or a byte loop.
14312 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14313 with specified algorithm.
14315 4) Epilogue: code copying tail of the block that is too small to be
14316 handled by main body (or up to size guarded by prologue guard). */
14319 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14320 rtx expected_align_exp, rtx expected_size_exp)
14326 rtx jump_around_label = NULL;
14327 HOST_WIDE_INT align = 1;
14328 unsigned HOST_WIDE_INT count = 0;
14329 HOST_WIDE_INT expected_size = -1;
14330 int size_needed = 0, epilogue_size_needed;
14331 int desired_align = 0;
14332 enum stringop_alg alg;
14335 if (CONST_INT_P (align_exp))
14336 align = INTVAL (align_exp);
14337 /* i386 can do misaligned access on reasonably increased cost. */
14338 if (CONST_INT_P (expected_align_exp)
14339 && INTVAL (expected_align_exp) > align)
14340 align = INTVAL (expected_align_exp);
14341 if (CONST_INT_P (count_exp))
14342 count = expected_size = INTVAL (count_exp);
14343 if (CONST_INT_P (expected_size_exp) && count == 0)
14344 expected_size = INTVAL (expected_size_exp);
14346 /* Step 0: Decide on preferred algorithm, desired alignment and
14347 size of chunks to be copied by main loop. */
14349 alg = decide_alg (count, expected_size, false, &dynamic_check);
14350 desired_align = decide_alignment (align, alg, expected_size);
14352 if (!TARGET_ALIGN_STRINGOPS)
14353 align = desired_align;
14355 if (alg == libcall)
14357 gcc_assert (alg != no_stringop);
14359 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14360 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14361 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14366 gcc_unreachable ();
14368 size_needed = GET_MODE_SIZE (Pmode);
14370 case unrolled_loop:
14371 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14373 case rep_prefix_8_byte:
14376 case rep_prefix_4_byte:
14379 case rep_prefix_1_byte:
14385 epilogue_size_needed = size_needed;
14387 /* Step 1: Prologue guard. */
14389 /* Alignment code needs count to be in register. */
14390 if (CONST_INT_P (count_exp) && desired_align > align)
14392 enum machine_mode mode = SImode;
14393 if (TARGET_64BIT && (count & ~0xffffffff))
14395 count_exp = force_reg (mode, count_exp);
14397 gcc_assert (desired_align >= 1 && align >= 1);
14399 /* Ensure that alignment prologue won't copy past end of block. */
14400 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14402 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14403 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14404 Make sure it is power of 2. */
14405 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14407 label = gen_label_rtx ();
14408 emit_cmp_and_jump_insns (count_exp,
14409 GEN_INT (epilogue_size_needed),
14410 LTU, 0, counter_mode (count_exp), 1, label);
14411 if (GET_CODE (count_exp) == CONST_INT)
14413 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14414 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14416 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14418 /* Emit code to decide on runtime whether library call or inline should be
14420 if (dynamic_check != -1)
14422 rtx hot_label = gen_label_rtx ();
14423 jump_around_label = gen_label_rtx ();
14424 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14425 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14426 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14427 emit_block_move_via_libcall (dst, src, count_exp, false);
14428 emit_jump (jump_around_label);
14429 emit_label (hot_label);
14432 /* Step 2: Alignment prologue. */
14434 if (desired_align > align)
14436 /* Except for the first move in epilogue, we no longer know
14437 constant offset in aliasing info. It don't seems to worth
14438 the pain to maintain it for the first move, so throw away
14440 src = change_address (src, BLKmode, srcreg);
14441 dst = change_address (dst, BLKmode, destreg);
14442 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14445 if (label && size_needed == 1)
14447 emit_label (label);
14448 LABEL_NUSES (label) = 1;
14452 /* Step 3: Main loop. */
14458 gcc_unreachable ();
14460 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14461 count_exp, QImode, 1, expected_size);
14464 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14465 count_exp, Pmode, 1, expected_size);
14467 case unrolled_loop:
14468 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14469 registers for 4 temporaries anyway. */
14470 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14471 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14474 case rep_prefix_8_byte:
14475 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14478 case rep_prefix_4_byte:
14479 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14482 case rep_prefix_1_byte:
14483 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14487 /* Adjust properly the offset of src and dest memory for aliasing. */
14488 if (CONST_INT_P (count_exp))
14490 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14491 (count / size_needed) * size_needed);
14492 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14493 (count / size_needed) * size_needed);
14497 src = change_address (src, BLKmode, srcreg);
14498 dst = change_address (dst, BLKmode, destreg);
14501 /* Step 4: Epilogue to copy the remaining bytes. */
14505 /* When the main loop is done, COUNT_EXP might hold original count,
14506 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14507 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14508 bytes. Compensate if needed. */
14510 if (size_needed < epilogue_size_needed)
14513 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14514 GEN_INT (size_needed - 1), count_exp, 1,
14516 if (tmp != count_exp)
14517 emit_move_insn (count_exp, tmp);
14519 emit_label (label);
14520 LABEL_NUSES (label) = 1;
14523 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14524 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14525 epilogue_size_needed);
14526 if (jump_around_label)
14527 emit_label (jump_around_label);
14531 /* Helper function for memcpy. For QImode value 0xXY produce
14532 0xXYXYXYXY of wide specified by MODE. This is essentially
14533 a * 0x10101010, but we can do slightly better than
14534 synth_mult by unwinding the sequence by hand on CPUs with
14537 promote_duplicated_reg (enum machine_mode mode, rtx val)
14539 enum machine_mode valmode = GET_MODE (val);
14541 int nops = mode == DImode ? 3 : 2;
14543 gcc_assert (mode == SImode || mode == DImode);
14544 if (val == const0_rtx)
14545 return copy_to_mode_reg (mode, const0_rtx);
14546 if (CONST_INT_P (val))
14548 HOST_WIDE_INT v = INTVAL (val) & 255;
14552 if (mode == DImode)
14553 v |= (v << 16) << 16;
14554 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14557 if (valmode == VOIDmode)
14559 if (valmode != QImode)
14560 val = gen_lowpart (QImode, val);
14561 if (mode == QImode)
14563 if (!TARGET_PARTIAL_REG_STALL)
14565 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14566 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14567 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14568 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14570 rtx reg = convert_modes (mode, QImode, val, true);
14571 tmp = promote_duplicated_reg (mode, const1_rtx);
14572 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14577 rtx reg = convert_modes (mode, QImode, val, true);
14579 if (!TARGET_PARTIAL_REG_STALL)
14580 if (mode == SImode)
14581 emit_insn (gen_movsi_insv_1 (reg, reg));
14583 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14586 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14587 NULL, 1, OPTAB_DIRECT);
14589 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14591 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14592 NULL, 1, OPTAB_DIRECT);
14593 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14594 if (mode == SImode)
14596 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14597 NULL, 1, OPTAB_DIRECT);
14598 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14603 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14604 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14605 alignment from ALIGN to DESIRED_ALIGN. */
14607 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14612 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14613 promoted_val = promote_duplicated_reg (DImode, val);
14614 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14615 promoted_val = promote_duplicated_reg (SImode, val);
14616 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14617 promoted_val = promote_duplicated_reg (HImode, val);
14619 promoted_val = val;
14621 return promoted_val;
14624 /* Expand string clear operation (bzero). Use i386 string operations when
14625 profitable. See expand_movmem comment for explanation of individual
14626 steps performed. */
14628 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14629 rtx expected_align_exp, rtx expected_size_exp)
14634 rtx jump_around_label = NULL;
14635 HOST_WIDE_INT align = 1;
14636 unsigned HOST_WIDE_INT count = 0;
14637 HOST_WIDE_INT expected_size = -1;
14638 int size_needed = 0, epilogue_size_needed;
14639 int desired_align = 0;
14640 enum stringop_alg alg;
14641 rtx promoted_val = NULL;
14642 bool force_loopy_epilogue = false;
14645 if (CONST_INT_P (align_exp))
14646 align = INTVAL (align_exp);
14647 /* i386 can do misaligned access on reasonably increased cost. */
14648 if (CONST_INT_P (expected_align_exp)
14649 && INTVAL (expected_align_exp) > align)
14650 align = INTVAL (expected_align_exp);
14651 if (CONST_INT_P (count_exp))
14652 count = expected_size = INTVAL (count_exp);
14653 if (CONST_INT_P (expected_size_exp) && count == 0)
14654 expected_size = INTVAL (expected_size_exp);
14656 /* Step 0: Decide on preferred algorithm, desired alignment and
14657 size of chunks to be copied by main loop. */
14659 alg = decide_alg (count, expected_size, true, &dynamic_check);
14660 desired_align = decide_alignment (align, alg, expected_size);
14662 if (!TARGET_ALIGN_STRINGOPS)
14663 align = desired_align;
14665 if (alg == libcall)
14667 gcc_assert (alg != no_stringop);
14669 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14670 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14675 gcc_unreachable ();
14677 size_needed = GET_MODE_SIZE (Pmode);
14679 case unrolled_loop:
14680 size_needed = GET_MODE_SIZE (Pmode) * 4;
14682 case rep_prefix_8_byte:
14685 case rep_prefix_4_byte:
14688 case rep_prefix_1_byte:
14693 epilogue_size_needed = size_needed;
14695 /* Step 1: Prologue guard. */
14697 /* Alignment code needs count to be in register. */
14698 if (CONST_INT_P (count_exp) && desired_align > align)
14700 enum machine_mode mode = SImode;
14701 if (TARGET_64BIT && (count & ~0xffffffff))
14703 count_exp = force_reg (mode, count_exp);
14705 /* Do the cheap promotion to allow better CSE across the
14706 main loop and epilogue (ie one load of the big constant in the
14707 front of all code. */
14708 if (CONST_INT_P (val_exp))
14709 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14710 desired_align, align);
14711 /* Ensure that alignment prologue won't copy past end of block. */
14712 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14714 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14715 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14716 Make sure it is power of 2. */
14717 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14719 /* To improve performance of small blocks, we jump around the VAL
14720 promoting mode. This mean that if the promoted VAL is not constant,
14721 we might not use it in the epilogue and have to use byte
14723 if (epilogue_size_needed > 2 && !promoted_val)
14724 force_loopy_epilogue = true;
14725 label = gen_label_rtx ();
14726 emit_cmp_and_jump_insns (count_exp,
14727 GEN_INT (epilogue_size_needed),
14728 LTU, 0, counter_mode (count_exp), 1, label);
14729 if (GET_CODE (count_exp) == CONST_INT)
14731 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14732 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14734 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14736 if (dynamic_check != -1)
14738 rtx hot_label = gen_label_rtx ();
14739 jump_around_label = gen_label_rtx ();
14740 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14741 LEU, 0, counter_mode (count_exp), 1, hot_label);
14742 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14743 set_storage_via_libcall (dst, count_exp, val_exp, false);
14744 emit_jump (jump_around_label);
14745 emit_label (hot_label);
14748 /* Step 2: Alignment prologue. */
14750 /* Do the expensive promotion once we branched off the small blocks. */
14752 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14753 desired_align, align);
14754 gcc_assert (desired_align >= 1 && align >= 1);
14756 if (desired_align > align)
14758 /* Except for the first move in epilogue, we no longer know
14759 constant offset in aliasing info. It don't seems to worth
14760 the pain to maintain it for the first move, so throw away
14762 dst = change_address (dst, BLKmode, destreg);
14763 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14766 if (label && size_needed == 1)
14768 emit_label (label);
14769 LABEL_NUSES (label) = 1;
14773 /* Step 3: Main loop. */
14779 gcc_unreachable ();
14781 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14782 count_exp, QImode, 1, expected_size);
14785 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14786 count_exp, Pmode, 1, expected_size);
14788 case unrolled_loop:
14789 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14790 count_exp, Pmode, 4, expected_size);
14792 case rep_prefix_8_byte:
14793 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14796 case rep_prefix_4_byte:
14797 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14800 case rep_prefix_1_byte:
14801 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14805 /* Adjust properly the offset of src and dest memory for aliasing. */
14806 if (CONST_INT_P (count_exp))
14807 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14808 (count / size_needed) * size_needed);
14810 dst = change_address (dst, BLKmode, destreg);
14812 /* Step 4: Epilogue to copy the remaining bytes. */
14816 /* When the main loop is done, COUNT_EXP might hold original count,
14817 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14818 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14819 bytes. Compensate if needed. */
14821 if (size_needed < desired_align - align)
14824 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14825 GEN_INT (size_needed - 1), count_exp, 1,
14827 size_needed = desired_align - align + 1;
14828 if (tmp != count_exp)
14829 emit_move_insn (count_exp, tmp);
14831 emit_label (label);
14832 LABEL_NUSES (label) = 1;
14834 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14836 if (force_loopy_epilogue)
14837 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14840 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14843 if (jump_around_label)
14844 emit_label (jump_around_label);
14848 /* Expand the appropriate insns for doing strlen if not just doing
14851 out = result, initialized with the start address
14852 align_rtx = alignment of the address.
14853 scratch = scratch register, initialized with the startaddress when
14854 not aligned, otherwise undefined
14856 This is just the body. It needs the initializations mentioned above and
14857 some address computing at the end. These things are done in i386.md. */
14860 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14864 rtx align_2_label = NULL_RTX;
14865 rtx align_3_label = NULL_RTX;
14866 rtx align_4_label = gen_label_rtx ();
14867 rtx end_0_label = gen_label_rtx ();
14869 rtx tmpreg = gen_reg_rtx (SImode);
14870 rtx scratch = gen_reg_rtx (SImode);
14874 if (CONST_INT_P (align_rtx))
14875 align = INTVAL (align_rtx);
14877 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14879 /* Is there a known alignment and is it less than 4? */
14882 rtx scratch1 = gen_reg_rtx (Pmode);
14883 emit_move_insn (scratch1, out);
14884 /* Is there a known alignment and is it not 2? */
14887 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14888 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14890 /* Leave just the 3 lower bits. */
14891 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14892 NULL_RTX, 0, OPTAB_WIDEN);
14894 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14895 Pmode, 1, align_4_label);
14896 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14897 Pmode, 1, align_2_label);
14898 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14899 Pmode, 1, align_3_label);
14903 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14904 check if is aligned to 4 - byte. */
14906 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14907 NULL_RTX, 0, OPTAB_WIDEN);
14909 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14910 Pmode, 1, align_4_label);
14913 mem = change_address (src, QImode, out);
14915 /* Now compare the bytes. */
14917 /* Compare the first n unaligned byte on a byte per byte basis. */
14918 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14919 QImode, 1, end_0_label);
14921 /* Increment the address. */
14923 emit_insn (gen_adddi3 (out, out, const1_rtx));
14925 emit_insn (gen_addsi3 (out, out, const1_rtx));
14927 /* Not needed with an alignment of 2 */
14930 emit_label (align_2_label);
14932 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14936 emit_insn (gen_adddi3 (out, out, const1_rtx));
14938 emit_insn (gen_addsi3 (out, out, const1_rtx));
14940 emit_label (align_3_label);
14943 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14947 emit_insn (gen_adddi3 (out, out, const1_rtx));
14949 emit_insn (gen_addsi3 (out, out, const1_rtx));
14952 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14953 align this loop. It gives only huge programs, but does not help to
14955 emit_label (align_4_label);
14957 mem = change_address (src, SImode, out);
14958 emit_move_insn (scratch, mem);
14960 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14962 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14964 /* This formula yields a nonzero result iff one of the bytes is zero.
14965 This saves three branches inside loop and many cycles. */
14967 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14968 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14969 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14970 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14971 gen_int_mode (0x80808080, SImode)));
14972 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14977 rtx reg = gen_reg_rtx (SImode);
14978 rtx reg2 = gen_reg_rtx (Pmode);
14979 emit_move_insn (reg, tmpreg);
14980 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14982 /* If zero is not in the first two bytes, move two bytes forward. */
14983 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14984 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14985 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14986 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14987 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14990 /* Emit lea manually to avoid clobbering of flags. */
14991 emit_insn (gen_rtx_SET (SImode, reg2,
14992 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14994 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14995 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14996 emit_insn (gen_rtx_SET (VOIDmode, out,
14997 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15004 rtx end_2_label = gen_label_rtx ();
15005 /* Is zero in the first two bytes? */
15007 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15009 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15010 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15011 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15013 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15014 JUMP_LABEL (tmp) = end_2_label;
15016 /* Not in the first two. Move two bytes forward. */
15017 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15019 emit_insn (gen_adddi3 (out, out, const2_rtx));
15021 emit_insn (gen_addsi3 (out, out, const2_rtx));
15023 emit_label (end_2_label);
15027 /* Avoid branch in fixing the byte. */
15028 tmpreg = gen_lowpart (QImode, tmpreg);
15029 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15030 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15032 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15034 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15036 emit_label (end_0_label);
15039 /* Expand strlen. */
15042 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15044 rtx addr, scratch1, scratch2, scratch3, scratch4;
15046 /* The generic case of strlen expander is long. Avoid it's
15047 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15049 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15050 && !TARGET_INLINE_ALL_STRINGOPS
15052 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15055 addr = force_reg (Pmode, XEXP (src, 0));
15056 scratch1 = gen_reg_rtx (Pmode);
15058 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15061 /* Well it seems that some optimizer does not combine a call like
15062 foo(strlen(bar), strlen(bar));
15063 when the move and the subtraction is done here. It does calculate
15064 the length just once when these instructions are done inside of
15065 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15066 often used and I use one fewer register for the lifetime of
15067 output_strlen_unroll() this is better. */
15069 emit_move_insn (out, addr);
15071 ix86_expand_strlensi_unroll_1 (out, src, align);
15073 /* strlensi_unroll_1 returns the address of the zero at the end of
15074 the string, like memchr(), so compute the length by subtracting
15075 the start address. */
15077 emit_insn (gen_subdi3 (out, out, addr));
15079 emit_insn (gen_subsi3 (out, out, addr));
15084 scratch2 = gen_reg_rtx (Pmode);
15085 scratch3 = gen_reg_rtx (Pmode);
15086 scratch4 = force_reg (Pmode, constm1_rtx);
15088 emit_move_insn (scratch3, addr);
15089 eoschar = force_reg (QImode, eoschar);
15091 src = replace_equiv_address_nv (src, scratch3);
15093 /* If .md starts supporting :P, this can be done in .md. */
15094 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15095 scratch4), UNSPEC_SCAS);
15096 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15099 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15100 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15104 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15105 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15111 /* For given symbol (function) construct code to compute address of it's PLT
15112 entry in large x86-64 PIC model. */
15114 construct_plt_address (rtx symbol)
15116 rtx tmp = gen_reg_rtx (Pmode);
15117 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15119 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15120 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15122 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15123 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15128 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15129 rtx callarg2 ATTRIBUTE_UNUSED,
15130 rtx pop, int sibcall)
15132 rtx use = NULL, call;
15134 if (pop == const0_rtx)
15136 gcc_assert (!TARGET_64BIT || !pop);
15138 if (TARGET_MACHO && !TARGET_64BIT)
15141 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15142 fnaddr = machopic_indirect_call_target (fnaddr);
15147 /* Static functions and indirect calls don't need the pic register. */
15148 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15149 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15150 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15151 use_reg (&use, pic_offset_table_rtx);
15154 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15156 rtx al = gen_rtx_REG (QImode, 0);
15157 emit_move_insn (al, callarg2);
15158 use_reg (&use, al);
15161 if (ix86_cmodel == CM_LARGE_PIC
15162 && GET_CODE (fnaddr) == MEM
15163 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15164 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15165 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15166 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15168 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15169 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15171 if (sibcall && TARGET_64BIT
15172 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15175 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15176 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15177 emit_move_insn (fnaddr, addr);
15178 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15181 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15183 call = gen_rtx_SET (VOIDmode, retval, call);
15186 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15187 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15188 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15191 call = emit_call_insn (call);
15193 CALL_INSN_FUNCTION_USAGE (call) = use;
15197 /* Clear stack slot assignments remembered from previous functions.
15198 This is called from INIT_EXPANDERS once before RTL is emitted for each
15201 static struct machine_function *
15202 ix86_init_machine_status (void)
15204 struct machine_function *f;
15206 f = ggc_alloc_cleared (sizeof (struct machine_function));
15207 f->use_fast_prologue_epilogue_nregs = -1;
15208 f->tls_descriptor_call_expanded_p = 0;
15213 /* Return a MEM corresponding to a stack slot with mode MODE.
15214 Allocate a new slot if necessary.
15216 The RTL for a function can have several slots available: N is
15217 which slot to use. */
15220 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15222 struct stack_local_entry *s;
15224 gcc_assert (n < MAX_386_STACK_LOCALS);
15226 for (s = ix86_stack_locals; s; s = s->next)
15227 if (s->mode == mode && s->n == n)
15228 return copy_rtx (s->rtl);
15230 s = (struct stack_local_entry *)
15231 ggc_alloc (sizeof (struct stack_local_entry));
15234 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15236 s->next = ix86_stack_locals;
15237 ix86_stack_locals = s;
15241 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15243 static GTY(()) rtx ix86_tls_symbol;
15245 ix86_tls_get_addr (void)
15248 if (!ix86_tls_symbol)
15250 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15251 (TARGET_ANY_GNU_TLS
15253 ? "___tls_get_addr"
15254 : "__tls_get_addr");
15257 return ix86_tls_symbol;
15260 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15262 static GTY(()) rtx ix86_tls_module_base_symbol;
15264 ix86_tls_module_base (void)
15267 if (!ix86_tls_module_base_symbol)
15269 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15270 "_TLS_MODULE_BASE_");
15271 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15272 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15275 return ix86_tls_module_base_symbol;
15278 /* Calculate the length of the memory address in the instruction
15279 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15282 memory_address_length (rtx addr)
15284 struct ix86_address parts;
15285 rtx base, index, disp;
15289 if (GET_CODE (addr) == PRE_DEC
15290 || GET_CODE (addr) == POST_INC
15291 || GET_CODE (addr) == PRE_MODIFY
15292 || GET_CODE (addr) == POST_MODIFY)
15295 ok = ix86_decompose_address (addr, &parts);
15298 if (parts.base && GET_CODE (parts.base) == SUBREG)
15299 parts.base = SUBREG_REG (parts.base);
15300 if (parts.index && GET_CODE (parts.index) == SUBREG)
15301 parts.index = SUBREG_REG (parts.index);
15304 index = parts.index;
15309 - esp as the base always wants an index,
15310 - ebp as the base always wants a displacement. */
15312 /* Register Indirect. */
15313 if (base && !index && !disp)
15315 /* esp (for its index) and ebp (for its displacement) need
15316 the two-byte modrm form. */
15317 if (addr == stack_pointer_rtx
15318 || addr == arg_pointer_rtx
15319 || addr == frame_pointer_rtx
15320 || addr == hard_frame_pointer_rtx)
15324 /* Direct Addressing. */
15325 else if (disp && !base && !index)
15330 /* Find the length of the displacement constant. */
15333 if (base && satisfies_constraint_K (disp))
15338 /* ebp always wants a displacement. */
15339 else if (base == hard_frame_pointer_rtx)
15342 /* An index requires the two-byte modrm form.... */
15344 /* ...like esp, which always wants an index. */
15345 || base == stack_pointer_rtx
15346 || base == arg_pointer_rtx
15347 || base == frame_pointer_rtx)
15354 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15355 is set, expect that insn have 8bit immediate alternative. */
15357 ix86_attr_length_immediate_default (rtx insn, int shortform)
15361 extract_insn_cached (insn);
15362 for (i = recog_data.n_operands - 1; i >= 0; --i)
15363 if (CONSTANT_P (recog_data.operand[i]))
15366 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15370 switch (get_attr_mode (insn))
15381 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15386 fatal_insn ("unknown insn mode", insn);
15392 /* Compute default value for "length_address" attribute. */
15394 ix86_attr_length_address_default (rtx insn)
15398 if (get_attr_type (insn) == TYPE_LEA)
15400 rtx set = PATTERN (insn);
15402 if (GET_CODE (set) == PARALLEL)
15403 set = XVECEXP (set, 0, 0);
15405 gcc_assert (GET_CODE (set) == SET);
15407 return memory_address_length (SET_SRC (set));
15410 extract_insn_cached (insn);
15411 for (i = recog_data.n_operands - 1; i >= 0; --i)
15412 if (MEM_P (recog_data.operand[i]))
15414 return memory_address_length (XEXP (recog_data.operand[i], 0));
15420 /* Return the maximum number of instructions a cpu can issue. */
15423 ix86_issue_rate (void)
15427 case PROCESSOR_PENTIUM:
15431 case PROCESSOR_PENTIUMPRO:
15432 case PROCESSOR_PENTIUM4:
15433 case PROCESSOR_ATHLON:
15435 case PROCESSOR_AMDFAM10:
15436 case PROCESSOR_NOCONA:
15437 case PROCESSOR_GENERIC32:
15438 case PROCESSOR_GENERIC64:
15441 case PROCESSOR_CORE2:
15449 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15450 by DEP_INSN and nothing set by DEP_INSN. */
15453 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15457 /* Simplify the test for uninteresting insns. */
15458 if (insn_type != TYPE_SETCC
15459 && insn_type != TYPE_ICMOV
15460 && insn_type != TYPE_FCMOV
15461 && insn_type != TYPE_IBR)
15464 if ((set = single_set (dep_insn)) != 0)
15466 set = SET_DEST (set);
15469 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15470 && XVECLEN (PATTERN (dep_insn), 0) == 2
15471 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15472 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15474 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15475 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15480 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15483 /* This test is true if the dependent insn reads the flags but
15484 not any other potentially set register. */
15485 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15488 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15494 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15495 address with operands set by DEP_INSN. */
15498 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15502 if (insn_type == TYPE_LEA
15505 addr = PATTERN (insn);
15507 if (GET_CODE (addr) == PARALLEL)
15508 addr = XVECEXP (addr, 0, 0);
15510 gcc_assert (GET_CODE (addr) == SET);
15512 addr = SET_SRC (addr);
15517 extract_insn_cached (insn);
15518 for (i = recog_data.n_operands - 1; i >= 0; --i)
15519 if (MEM_P (recog_data.operand[i]))
15521 addr = XEXP (recog_data.operand[i], 0);
15528 return modified_in_p (addr, dep_insn);
15532 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15534 enum attr_type insn_type, dep_insn_type;
15535 enum attr_memory memory;
15537 int dep_insn_code_number;
15539 /* Anti and output dependencies have zero cost on all CPUs. */
15540 if (REG_NOTE_KIND (link) != 0)
15543 dep_insn_code_number = recog_memoized (dep_insn);
15545 /* If we can't recognize the insns, we can't really do anything. */
15546 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15549 insn_type = get_attr_type (insn);
15550 dep_insn_type = get_attr_type (dep_insn);
15554 case PROCESSOR_PENTIUM:
15555 /* Address Generation Interlock adds a cycle of latency. */
15556 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15559 /* ??? Compares pair with jump/setcc. */
15560 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15563 /* Floating point stores require value to be ready one cycle earlier. */
15564 if (insn_type == TYPE_FMOV
15565 && get_attr_memory (insn) == MEMORY_STORE
15566 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15570 case PROCESSOR_PENTIUMPRO:
15571 memory = get_attr_memory (insn);
15573 /* INT->FP conversion is expensive. */
15574 if (get_attr_fp_int_src (dep_insn))
15577 /* There is one cycle extra latency between an FP op and a store. */
15578 if (insn_type == TYPE_FMOV
15579 && (set = single_set (dep_insn)) != NULL_RTX
15580 && (set2 = single_set (insn)) != NULL_RTX
15581 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15582 && MEM_P (SET_DEST (set2)))
15585 /* Show ability of reorder buffer to hide latency of load by executing
15586 in parallel with previous instruction in case
15587 previous instruction is not needed to compute the address. */
15588 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15589 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15591 /* Claim moves to take one cycle, as core can issue one load
15592 at time and the next load can start cycle later. */
15593 if (dep_insn_type == TYPE_IMOV
15594 || dep_insn_type == TYPE_FMOV)
15602 memory = get_attr_memory (insn);
15604 /* The esp dependency is resolved before the instruction is really
15606 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15607 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15610 /* INT->FP conversion is expensive. */
15611 if (get_attr_fp_int_src (dep_insn))
15614 /* Show ability of reorder buffer to hide latency of load by executing
15615 in parallel with previous instruction in case
15616 previous instruction is not needed to compute the address. */
15617 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15618 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15620 /* Claim moves to take one cycle, as core can issue one load
15621 at time and the next load can start cycle later. */
15622 if (dep_insn_type == TYPE_IMOV
15623 || dep_insn_type == TYPE_FMOV)
15632 case PROCESSOR_ATHLON:
15634 case PROCESSOR_AMDFAM10:
15635 case PROCESSOR_GENERIC32:
15636 case PROCESSOR_GENERIC64:
15637 memory = get_attr_memory (insn);
15639 /* Show ability of reorder buffer to hide latency of load by executing
15640 in parallel with previous instruction in case
15641 previous instruction is not needed to compute the address. */
15642 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15643 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15645 enum attr_unit unit = get_attr_unit (insn);
15648 /* Because of the difference between the length of integer and
15649 floating unit pipeline preparation stages, the memory operands
15650 for floating point are cheaper.
15652 ??? For Athlon it the difference is most probably 2. */
15653 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15656 loadcost = TARGET_ATHLON ? 2 : 0;
15658 if (cost >= loadcost)
15671 /* How many alternative schedules to try. This should be as wide as the
15672 scheduling freedom in the DFA, but no wider. Making this value too
15673 large results extra work for the scheduler. */
15676 ia32_multipass_dfa_lookahead (void)
15678 if (ix86_tune == PROCESSOR_PENTIUM)
15681 if (ix86_tune == PROCESSOR_PENTIUMPRO
15682 || ix86_tune == PROCESSOR_K6)
15690 /* Compute the alignment given to a constant that is being placed in memory.
15691 EXP is the constant and ALIGN is the alignment that the object would
15693 The value of this function is used instead of that alignment to align
15697 ix86_constant_alignment (tree exp, int align)
15699 if (TREE_CODE (exp) == REAL_CST)
15701 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15703 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15706 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15707 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15708 return BITS_PER_WORD;
15713 /* Compute the alignment for a static variable.
15714 TYPE is the data type, and ALIGN is the alignment that
15715 the object would ordinarily have. The value of this function is used
15716 instead of that alignment to align the object. */
15719 ix86_data_alignment (tree type, int align)
15721 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15723 if (AGGREGATE_TYPE_P (type)
15724 && TYPE_SIZE (type)
15725 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15726 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15727 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15728 && align < max_align)
15731 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15732 to 16byte boundary. */
15735 if (AGGREGATE_TYPE_P (type)
15736 && TYPE_SIZE (type)
15737 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15738 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15739 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15743 if (TREE_CODE (type) == ARRAY_TYPE)
15745 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15747 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15750 else if (TREE_CODE (type) == COMPLEX_TYPE)
15753 if (TYPE_MODE (type) == DCmode && align < 64)
15755 if (TYPE_MODE (type) == XCmode && align < 128)
15758 else if ((TREE_CODE (type) == RECORD_TYPE
15759 || TREE_CODE (type) == UNION_TYPE
15760 || TREE_CODE (type) == QUAL_UNION_TYPE)
15761 && TYPE_FIELDS (type))
15763 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15765 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15768 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15769 || TREE_CODE (type) == INTEGER_TYPE)
15771 if (TYPE_MODE (type) == DFmode && align < 64)
15773 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15780 /* Compute the alignment for a local variable.
15781 TYPE is the data type, and ALIGN is the alignment that
15782 the object would ordinarily have. The value of this macro is used
15783 instead of that alignment to align the object. */
15786 ix86_local_alignment (tree type, int align)
15788 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15789 to 16byte boundary. */
15792 if (AGGREGATE_TYPE_P (type)
15793 && TYPE_SIZE (type)
15794 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15795 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15796 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15799 if (TREE_CODE (type) == ARRAY_TYPE)
15801 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15803 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15806 else if (TREE_CODE (type) == COMPLEX_TYPE)
15808 if (TYPE_MODE (type) == DCmode && align < 64)
15810 if (TYPE_MODE (type) == XCmode && align < 128)
15813 else if ((TREE_CODE (type) == RECORD_TYPE
15814 || TREE_CODE (type) == UNION_TYPE
15815 || TREE_CODE (type) == QUAL_UNION_TYPE)
15816 && TYPE_FIELDS (type))
15818 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15820 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15823 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15824 || TREE_CODE (type) == INTEGER_TYPE)
15827 if (TYPE_MODE (type) == DFmode && align < 64)
15829 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15835 /* Emit RTL insns to initialize the variable parts of a trampoline.
15836 FNADDR is an RTX for the address of the function's pure code.
15837 CXT is an RTX for the static chain value for the function. */
15839 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15843 /* Compute offset from the end of the jmp to the target function. */
15844 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15845 plus_constant (tramp, 10),
15846 NULL_RTX, 1, OPTAB_DIRECT);
15847 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15848 gen_int_mode (0xb9, QImode));
15849 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15850 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15851 gen_int_mode (0xe9, QImode));
15852 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15857 /* Try to load address using shorter movl instead of movabs.
15858 We may want to support movq for kernel mode, but kernel does not use
15859 trampolines at the moment. */
15860 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15862 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15863 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15864 gen_int_mode (0xbb41, HImode));
15865 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15866 gen_lowpart (SImode, fnaddr));
15871 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15872 gen_int_mode (0xbb49, HImode));
15873 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15877 /* Load static chain using movabs to r10. */
15878 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15879 gen_int_mode (0xba49, HImode));
15880 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15883 /* Jump to the r11 */
15884 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15885 gen_int_mode (0xff49, HImode));
15886 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15887 gen_int_mode (0xe3, QImode));
15889 gcc_assert (offset <= TRAMPOLINE_SIZE);
15892 #ifdef ENABLE_EXECUTE_STACK
15893 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15894 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15898 /* Codes for all the SSE/MMX builtins. */
15901 IX86_BUILTIN_ADDPS,
15902 IX86_BUILTIN_ADDSS,
15903 IX86_BUILTIN_DIVPS,
15904 IX86_BUILTIN_DIVSS,
15905 IX86_BUILTIN_MULPS,
15906 IX86_BUILTIN_MULSS,
15907 IX86_BUILTIN_SUBPS,
15908 IX86_BUILTIN_SUBSS,
15910 IX86_BUILTIN_CMPEQPS,
15911 IX86_BUILTIN_CMPLTPS,
15912 IX86_BUILTIN_CMPLEPS,
15913 IX86_BUILTIN_CMPGTPS,
15914 IX86_BUILTIN_CMPGEPS,
15915 IX86_BUILTIN_CMPNEQPS,
15916 IX86_BUILTIN_CMPNLTPS,
15917 IX86_BUILTIN_CMPNLEPS,
15918 IX86_BUILTIN_CMPNGTPS,
15919 IX86_BUILTIN_CMPNGEPS,
15920 IX86_BUILTIN_CMPORDPS,
15921 IX86_BUILTIN_CMPUNORDPS,
15922 IX86_BUILTIN_CMPEQSS,
15923 IX86_BUILTIN_CMPLTSS,
15924 IX86_BUILTIN_CMPLESS,
15925 IX86_BUILTIN_CMPNEQSS,
15926 IX86_BUILTIN_CMPNLTSS,
15927 IX86_BUILTIN_CMPNLESS,
15928 IX86_BUILTIN_CMPNGTSS,
15929 IX86_BUILTIN_CMPNGESS,
15930 IX86_BUILTIN_CMPORDSS,
15931 IX86_BUILTIN_CMPUNORDSS,
15933 IX86_BUILTIN_COMIEQSS,
15934 IX86_BUILTIN_COMILTSS,
15935 IX86_BUILTIN_COMILESS,
15936 IX86_BUILTIN_COMIGTSS,
15937 IX86_BUILTIN_COMIGESS,
15938 IX86_BUILTIN_COMINEQSS,
15939 IX86_BUILTIN_UCOMIEQSS,
15940 IX86_BUILTIN_UCOMILTSS,
15941 IX86_BUILTIN_UCOMILESS,
15942 IX86_BUILTIN_UCOMIGTSS,
15943 IX86_BUILTIN_UCOMIGESS,
15944 IX86_BUILTIN_UCOMINEQSS,
15946 IX86_BUILTIN_CVTPI2PS,
15947 IX86_BUILTIN_CVTPS2PI,
15948 IX86_BUILTIN_CVTSI2SS,
15949 IX86_BUILTIN_CVTSI642SS,
15950 IX86_BUILTIN_CVTSS2SI,
15951 IX86_BUILTIN_CVTSS2SI64,
15952 IX86_BUILTIN_CVTTPS2PI,
15953 IX86_BUILTIN_CVTTSS2SI,
15954 IX86_BUILTIN_CVTTSS2SI64,
15956 IX86_BUILTIN_MAXPS,
15957 IX86_BUILTIN_MAXSS,
15958 IX86_BUILTIN_MINPS,
15959 IX86_BUILTIN_MINSS,
15961 IX86_BUILTIN_LOADUPS,
15962 IX86_BUILTIN_STOREUPS,
15963 IX86_BUILTIN_MOVSS,
15965 IX86_BUILTIN_MOVHLPS,
15966 IX86_BUILTIN_MOVLHPS,
15967 IX86_BUILTIN_LOADHPS,
15968 IX86_BUILTIN_LOADLPS,
15969 IX86_BUILTIN_STOREHPS,
15970 IX86_BUILTIN_STORELPS,
15972 IX86_BUILTIN_MASKMOVQ,
15973 IX86_BUILTIN_MOVMSKPS,
15974 IX86_BUILTIN_PMOVMSKB,
15976 IX86_BUILTIN_MOVNTPS,
15977 IX86_BUILTIN_MOVNTQ,
15979 IX86_BUILTIN_LOADDQU,
15980 IX86_BUILTIN_STOREDQU,
15982 IX86_BUILTIN_PACKSSWB,
15983 IX86_BUILTIN_PACKSSDW,
15984 IX86_BUILTIN_PACKUSWB,
15986 IX86_BUILTIN_PADDB,
15987 IX86_BUILTIN_PADDW,
15988 IX86_BUILTIN_PADDD,
15989 IX86_BUILTIN_PADDQ,
15990 IX86_BUILTIN_PADDSB,
15991 IX86_BUILTIN_PADDSW,
15992 IX86_BUILTIN_PADDUSB,
15993 IX86_BUILTIN_PADDUSW,
15994 IX86_BUILTIN_PSUBB,
15995 IX86_BUILTIN_PSUBW,
15996 IX86_BUILTIN_PSUBD,
15997 IX86_BUILTIN_PSUBQ,
15998 IX86_BUILTIN_PSUBSB,
15999 IX86_BUILTIN_PSUBSW,
16000 IX86_BUILTIN_PSUBUSB,
16001 IX86_BUILTIN_PSUBUSW,
16004 IX86_BUILTIN_PANDN,
16008 IX86_BUILTIN_PAVGB,
16009 IX86_BUILTIN_PAVGW,
16011 IX86_BUILTIN_PCMPEQB,
16012 IX86_BUILTIN_PCMPEQW,
16013 IX86_BUILTIN_PCMPEQD,
16014 IX86_BUILTIN_PCMPGTB,
16015 IX86_BUILTIN_PCMPGTW,
16016 IX86_BUILTIN_PCMPGTD,
16018 IX86_BUILTIN_PMADDWD,
16020 IX86_BUILTIN_PMAXSW,
16021 IX86_BUILTIN_PMAXUB,
16022 IX86_BUILTIN_PMINSW,
16023 IX86_BUILTIN_PMINUB,
16025 IX86_BUILTIN_PMULHUW,
16026 IX86_BUILTIN_PMULHW,
16027 IX86_BUILTIN_PMULLW,
16029 IX86_BUILTIN_PSADBW,
16030 IX86_BUILTIN_PSHUFW,
16032 IX86_BUILTIN_PSLLW,
16033 IX86_BUILTIN_PSLLD,
16034 IX86_BUILTIN_PSLLQ,
16035 IX86_BUILTIN_PSRAW,
16036 IX86_BUILTIN_PSRAD,
16037 IX86_BUILTIN_PSRLW,
16038 IX86_BUILTIN_PSRLD,
16039 IX86_BUILTIN_PSRLQ,
16040 IX86_BUILTIN_PSLLWI,
16041 IX86_BUILTIN_PSLLDI,
16042 IX86_BUILTIN_PSLLQI,
16043 IX86_BUILTIN_PSRAWI,
16044 IX86_BUILTIN_PSRADI,
16045 IX86_BUILTIN_PSRLWI,
16046 IX86_BUILTIN_PSRLDI,
16047 IX86_BUILTIN_PSRLQI,
16049 IX86_BUILTIN_PUNPCKHBW,
16050 IX86_BUILTIN_PUNPCKHWD,
16051 IX86_BUILTIN_PUNPCKHDQ,
16052 IX86_BUILTIN_PUNPCKLBW,
16053 IX86_BUILTIN_PUNPCKLWD,
16054 IX86_BUILTIN_PUNPCKLDQ,
16056 IX86_BUILTIN_SHUFPS,
16058 IX86_BUILTIN_RCPPS,
16059 IX86_BUILTIN_RCPSS,
16060 IX86_BUILTIN_RSQRTPS,
16061 IX86_BUILTIN_RSQRTSS,
16062 IX86_BUILTIN_SQRTPS,
16063 IX86_BUILTIN_SQRTSS,
16065 IX86_BUILTIN_UNPCKHPS,
16066 IX86_BUILTIN_UNPCKLPS,
16068 IX86_BUILTIN_ANDPS,
16069 IX86_BUILTIN_ANDNPS,
16071 IX86_BUILTIN_XORPS,
16074 IX86_BUILTIN_LDMXCSR,
16075 IX86_BUILTIN_STMXCSR,
16076 IX86_BUILTIN_SFENCE,
16078 /* 3DNow! Original */
16079 IX86_BUILTIN_FEMMS,
16080 IX86_BUILTIN_PAVGUSB,
16081 IX86_BUILTIN_PF2ID,
16082 IX86_BUILTIN_PFACC,
16083 IX86_BUILTIN_PFADD,
16084 IX86_BUILTIN_PFCMPEQ,
16085 IX86_BUILTIN_PFCMPGE,
16086 IX86_BUILTIN_PFCMPGT,
16087 IX86_BUILTIN_PFMAX,
16088 IX86_BUILTIN_PFMIN,
16089 IX86_BUILTIN_PFMUL,
16090 IX86_BUILTIN_PFRCP,
16091 IX86_BUILTIN_PFRCPIT1,
16092 IX86_BUILTIN_PFRCPIT2,
16093 IX86_BUILTIN_PFRSQIT1,
16094 IX86_BUILTIN_PFRSQRT,
16095 IX86_BUILTIN_PFSUB,
16096 IX86_BUILTIN_PFSUBR,
16097 IX86_BUILTIN_PI2FD,
16098 IX86_BUILTIN_PMULHRW,
16100 /* 3DNow! Athlon Extensions */
16101 IX86_BUILTIN_PF2IW,
16102 IX86_BUILTIN_PFNACC,
16103 IX86_BUILTIN_PFPNACC,
16104 IX86_BUILTIN_PI2FW,
16105 IX86_BUILTIN_PSWAPDSI,
16106 IX86_BUILTIN_PSWAPDSF,
16109 IX86_BUILTIN_ADDPD,
16110 IX86_BUILTIN_ADDSD,
16111 IX86_BUILTIN_DIVPD,
16112 IX86_BUILTIN_DIVSD,
16113 IX86_BUILTIN_MULPD,
16114 IX86_BUILTIN_MULSD,
16115 IX86_BUILTIN_SUBPD,
16116 IX86_BUILTIN_SUBSD,
16118 IX86_BUILTIN_CMPEQPD,
16119 IX86_BUILTIN_CMPLTPD,
16120 IX86_BUILTIN_CMPLEPD,
16121 IX86_BUILTIN_CMPGTPD,
16122 IX86_BUILTIN_CMPGEPD,
16123 IX86_BUILTIN_CMPNEQPD,
16124 IX86_BUILTIN_CMPNLTPD,
16125 IX86_BUILTIN_CMPNLEPD,
16126 IX86_BUILTIN_CMPNGTPD,
16127 IX86_BUILTIN_CMPNGEPD,
16128 IX86_BUILTIN_CMPORDPD,
16129 IX86_BUILTIN_CMPUNORDPD,
16130 IX86_BUILTIN_CMPEQSD,
16131 IX86_BUILTIN_CMPLTSD,
16132 IX86_BUILTIN_CMPLESD,
16133 IX86_BUILTIN_CMPNEQSD,
16134 IX86_BUILTIN_CMPNLTSD,
16135 IX86_BUILTIN_CMPNLESD,
16136 IX86_BUILTIN_CMPORDSD,
16137 IX86_BUILTIN_CMPUNORDSD,
16139 IX86_BUILTIN_COMIEQSD,
16140 IX86_BUILTIN_COMILTSD,
16141 IX86_BUILTIN_COMILESD,
16142 IX86_BUILTIN_COMIGTSD,
16143 IX86_BUILTIN_COMIGESD,
16144 IX86_BUILTIN_COMINEQSD,
16145 IX86_BUILTIN_UCOMIEQSD,
16146 IX86_BUILTIN_UCOMILTSD,
16147 IX86_BUILTIN_UCOMILESD,
16148 IX86_BUILTIN_UCOMIGTSD,
16149 IX86_BUILTIN_UCOMIGESD,
16150 IX86_BUILTIN_UCOMINEQSD,
16152 IX86_BUILTIN_MAXPD,
16153 IX86_BUILTIN_MAXSD,
16154 IX86_BUILTIN_MINPD,
16155 IX86_BUILTIN_MINSD,
16157 IX86_BUILTIN_ANDPD,
16158 IX86_BUILTIN_ANDNPD,
16160 IX86_BUILTIN_XORPD,
16162 IX86_BUILTIN_SQRTPD,
16163 IX86_BUILTIN_SQRTSD,
16165 IX86_BUILTIN_UNPCKHPD,
16166 IX86_BUILTIN_UNPCKLPD,
16168 IX86_BUILTIN_SHUFPD,
16170 IX86_BUILTIN_LOADUPD,
16171 IX86_BUILTIN_STOREUPD,
16172 IX86_BUILTIN_MOVSD,
16174 IX86_BUILTIN_LOADHPD,
16175 IX86_BUILTIN_LOADLPD,
16177 IX86_BUILTIN_CVTDQ2PD,
16178 IX86_BUILTIN_CVTDQ2PS,
16180 IX86_BUILTIN_CVTPD2DQ,
16181 IX86_BUILTIN_CVTPD2PI,
16182 IX86_BUILTIN_CVTPD2PS,
16183 IX86_BUILTIN_CVTTPD2DQ,
16184 IX86_BUILTIN_CVTTPD2PI,
16186 IX86_BUILTIN_CVTPI2PD,
16187 IX86_BUILTIN_CVTSI2SD,
16188 IX86_BUILTIN_CVTSI642SD,
16190 IX86_BUILTIN_CVTSD2SI,
16191 IX86_BUILTIN_CVTSD2SI64,
16192 IX86_BUILTIN_CVTSD2SS,
16193 IX86_BUILTIN_CVTSS2SD,
16194 IX86_BUILTIN_CVTTSD2SI,
16195 IX86_BUILTIN_CVTTSD2SI64,
16197 IX86_BUILTIN_CVTPS2DQ,
16198 IX86_BUILTIN_CVTPS2PD,
16199 IX86_BUILTIN_CVTTPS2DQ,
16201 IX86_BUILTIN_MOVNTI,
16202 IX86_BUILTIN_MOVNTPD,
16203 IX86_BUILTIN_MOVNTDQ,
16206 IX86_BUILTIN_MASKMOVDQU,
16207 IX86_BUILTIN_MOVMSKPD,
16208 IX86_BUILTIN_PMOVMSKB128,
16210 IX86_BUILTIN_PACKSSWB128,
16211 IX86_BUILTIN_PACKSSDW128,
16212 IX86_BUILTIN_PACKUSWB128,
16214 IX86_BUILTIN_PADDB128,
16215 IX86_BUILTIN_PADDW128,
16216 IX86_BUILTIN_PADDD128,
16217 IX86_BUILTIN_PADDQ128,
16218 IX86_BUILTIN_PADDSB128,
16219 IX86_BUILTIN_PADDSW128,
16220 IX86_BUILTIN_PADDUSB128,
16221 IX86_BUILTIN_PADDUSW128,
16222 IX86_BUILTIN_PSUBB128,
16223 IX86_BUILTIN_PSUBW128,
16224 IX86_BUILTIN_PSUBD128,
16225 IX86_BUILTIN_PSUBQ128,
16226 IX86_BUILTIN_PSUBSB128,
16227 IX86_BUILTIN_PSUBSW128,
16228 IX86_BUILTIN_PSUBUSB128,
16229 IX86_BUILTIN_PSUBUSW128,
16231 IX86_BUILTIN_PAND128,
16232 IX86_BUILTIN_PANDN128,
16233 IX86_BUILTIN_POR128,
16234 IX86_BUILTIN_PXOR128,
16236 IX86_BUILTIN_PAVGB128,
16237 IX86_BUILTIN_PAVGW128,
16239 IX86_BUILTIN_PCMPEQB128,
16240 IX86_BUILTIN_PCMPEQW128,
16241 IX86_BUILTIN_PCMPEQD128,
16242 IX86_BUILTIN_PCMPGTB128,
16243 IX86_BUILTIN_PCMPGTW128,
16244 IX86_BUILTIN_PCMPGTD128,
16246 IX86_BUILTIN_PMADDWD128,
16248 IX86_BUILTIN_PMAXSW128,
16249 IX86_BUILTIN_PMAXUB128,
16250 IX86_BUILTIN_PMINSW128,
16251 IX86_BUILTIN_PMINUB128,
16253 IX86_BUILTIN_PMULUDQ,
16254 IX86_BUILTIN_PMULUDQ128,
16255 IX86_BUILTIN_PMULHUW128,
16256 IX86_BUILTIN_PMULHW128,
16257 IX86_BUILTIN_PMULLW128,
16259 IX86_BUILTIN_PSADBW128,
16260 IX86_BUILTIN_PSHUFHW,
16261 IX86_BUILTIN_PSHUFLW,
16262 IX86_BUILTIN_PSHUFD,
16264 IX86_BUILTIN_PSLLDQI128,
16265 IX86_BUILTIN_PSLLWI128,
16266 IX86_BUILTIN_PSLLDI128,
16267 IX86_BUILTIN_PSLLQI128,
16268 IX86_BUILTIN_PSRAWI128,
16269 IX86_BUILTIN_PSRADI128,
16270 IX86_BUILTIN_PSRLDQI128,
16271 IX86_BUILTIN_PSRLWI128,
16272 IX86_BUILTIN_PSRLDI128,
16273 IX86_BUILTIN_PSRLQI128,
16275 IX86_BUILTIN_PSLLDQ128,
16276 IX86_BUILTIN_PSLLW128,
16277 IX86_BUILTIN_PSLLD128,
16278 IX86_BUILTIN_PSLLQ128,
16279 IX86_BUILTIN_PSRAW128,
16280 IX86_BUILTIN_PSRAD128,
16281 IX86_BUILTIN_PSRLW128,
16282 IX86_BUILTIN_PSRLD128,
16283 IX86_BUILTIN_PSRLQ128,
16285 IX86_BUILTIN_PUNPCKHBW128,
16286 IX86_BUILTIN_PUNPCKHWD128,
16287 IX86_BUILTIN_PUNPCKHDQ128,
16288 IX86_BUILTIN_PUNPCKHQDQ128,
16289 IX86_BUILTIN_PUNPCKLBW128,
16290 IX86_BUILTIN_PUNPCKLWD128,
16291 IX86_BUILTIN_PUNPCKLDQ128,
16292 IX86_BUILTIN_PUNPCKLQDQ128,
16294 IX86_BUILTIN_CLFLUSH,
16295 IX86_BUILTIN_MFENCE,
16296 IX86_BUILTIN_LFENCE,
16298 /* Prescott New Instructions. */
16299 IX86_BUILTIN_ADDSUBPS,
16300 IX86_BUILTIN_HADDPS,
16301 IX86_BUILTIN_HSUBPS,
16302 IX86_BUILTIN_MOVSHDUP,
16303 IX86_BUILTIN_MOVSLDUP,
16304 IX86_BUILTIN_ADDSUBPD,
16305 IX86_BUILTIN_HADDPD,
16306 IX86_BUILTIN_HSUBPD,
16307 IX86_BUILTIN_LDDQU,
16309 IX86_BUILTIN_MONITOR,
16310 IX86_BUILTIN_MWAIT,
16313 IX86_BUILTIN_PHADDW,
16314 IX86_BUILTIN_PHADDD,
16315 IX86_BUILTIN_PHADDSW,
16316 IX86_BUILTIN_PHSUBW,
16317 IX86_BUILTIN_PHSUBD,
16318 IX86_BUILTIN_PHSUBSW,
16319 IX86_BUILTIN_PMADDUBSW,
16320 IX86_BUILTIN_PMULHRSW,
16321 IX86_BUILTIN_PSHUFB,
16322 IX86_BUILTIN_PSIGNB,
16323 IX86_BUILTIN_PSIGNW,
16324 IX86_BUILTIN_PSIGND,
16325 IX86_BUILTIN_PALIGNR,
16326 IX86_BUILTIN_PABSB,
16327 IX86_BUILTIN_PABSW,
16328 IX86_BUILTIN_PABSD,
16330 IX86_BUILTIN_PHADDW128,
16331 IX86_BUILTIN_PHADDD128,
16332 IX86_BUILTIN_PHADDSW128,
16333 IX86_BUILTIN_PHSUBW128,
16334 IX86_BUILTIN_PHSUBD128,
16335 IX86_BUILTIN_PHSUBSW128,
16336 IX86_BUILTIN_PMADDUBSW128,
16337 IX86_BUILTIN_PMULHRSW128,
16338 IX86_BUILTIN_PSHUFB128,
16339 IX86_BUILTIN_PSIGNB128,
16340 IX86_BUILTIN_PSIGNW128,
16341 IX86_BUILTIN_PSIGND128,
16342 IX86_BUILTIN_PALIGNR128,
16343 IX86_BUILTIN_PABSB128,
16344 IX86_BUILTIN_PABSW128,
16345 IX86_BUILTIN_PABSD128,
16347 /* AMDFAM10 - SSE4A New Instructions. */
16348 IX86_BUILTIN_MOVNTSD,
16349 IX86_BUILTIN_MOVNTSS,
16350 IX86_BUILTIN_EXTRQI,
16351 IX86_BUILTIN_EXTRQ,
16352 IX86_BUILTIN_INSERTQI,
16353 IX86_BUILTIN_INSERTQ,
16355 IX86_BUILTIN_VEC_INIT_V2SI,
16356 IX86_BUILTIN_VEC_INIT_V4HI,
16357 IX86_BUILTIN_VEC_INIT_V8QI,
16358 IX86_BUILTIN_VEC_EXT_V2DF,
16359 IX86_BUILTIN_VEC_EXT_V2DI,
16360 IX86_BUILTIN_VEC_EXT_V4SF,
16361 IX86_BUILTIN_VEC_EXT_V4SI,
16362 IX86_BUILTIN_VEC_EXT_V8HI,
16363 IX86_BUILTIN_VEC_EXT_V2SI,
16364 IX86_BUILTIN_VEC_EXT_V4HI,
16365 IX86_BUILTIN_VEC_SET_V8HI,
16366 IX86_BUILTIN_VEC_SET_V4HI,
16371 /* Table for the ix86 builtin decls. */
16372 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16374 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16375 * if the target_flags include one of MASK. Stores the function decl
16376 * in the ix86_builtins array.
16377 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16380 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16382 tree decl = NULL_TREE;
16384 if (mask & target_flags
16385 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16387 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16389 ix86_builtins[(int) code] = decl;
16395 /* Like def_builtin, but also marks the function decl "const". */
16398 def_builtin_const (int mask, const char *name, tree type,
16399 enum ix86_builtins code)
16401 tree decl = def_builtin (mask, name, type, code);
16403 TREE_READONLY (decl) = 1;
16407 /* Bits for builtin_description.flag. */
16409 /* Set when we don't support the comparison natively, and should
16410 swap_comparison in order to support it. */
16411 #define BUILTIN_DESC_SWAP_OPERANDS 1
16413 struct builtin_description
16415 const unsigned int mask;
16416 const enum insn_code icode;
16417 const char *const name;
16418 const enum ix86_builtins code;
16419 const enum rtx_code comparison;
16420 const unsigned int flag;
16423 static const struct builtin_description bdesc_comi[] =
16425 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16426 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16427 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16428 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16429 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16430 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16431 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16432 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16433 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16434 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16435 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16436 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16437 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16438 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16439 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16440 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16441 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16442 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16443 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16444 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16445 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16446 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16451 static const struct builtin_description bdesc_2arg[] =
16454 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16455 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16456 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16457 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16458 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16459 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16460 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16461 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16463 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16464 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16465 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16466 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16467 BUILTIN_DESC_SWAP_OPERANDS },
16468 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16469 BUILTIN_DESC_SWAP_OPERANDS },
16470 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16471 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16472 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16473 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16474 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16475 BUILTIN_DESC_SWAP_OPERANDS },
16476 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16477 BUILTIN_DESC_SWAP_OPERANDS },
16478 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16479 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16480 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16481 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16482 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16483 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16484 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16485 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16486 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16487 BUILTIN_DESC_SWAP_OPERANDS },
16488 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16489 BUILTIN_DESC_SWAP_OPERANDS },
16490 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16492 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16493 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16494 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16495 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16497 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16498 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16499 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16500 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16502 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16503 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16504 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16505 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16506 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16509 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16510 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16511 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16513 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16514 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16515 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16518 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16519 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16520 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16521 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16522 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16523 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16524 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16525 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16527 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16528 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16529 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16531 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16532 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16533 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16534 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16536 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16537 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16539 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16540 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16541 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16542 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16543 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16544 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16546 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16547 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16548 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16549 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16551 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16552 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16553 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16554 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16555 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16556 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16559 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16560 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16561 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16563 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16564 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16565 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16567 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16568 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16569 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16570 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16571 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16572 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16574 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16575 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16576 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16577 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16578 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16579 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16581 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16582 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16583 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16584 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16586 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16587 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16591 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16592 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16594 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16595 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16596 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16600 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16601 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16602 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16603 BUILTIN_DESC_SWAP_OPERANDS },
16604 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16605 BUILTIN_DESC_SWAP_OPERANDS },
16606 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16607 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16608 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16611 BUILTIN_DESC_SWAP_OPERANDS },
16612 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16613 BUILTIN_DESC_SWAP_OPERANDS },
16614 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16615 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16616 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16617 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16618 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16619 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16620 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16621 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16622 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16624 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16626 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16630 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16631 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16632 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16634 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16635 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16636 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16639 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16640 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16641 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16642 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16643 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16644 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16645 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16646 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16648 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16649 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16650 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16651 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16652 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16653 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16654 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16655 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16657 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16658 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16660 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16661 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16662 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16663 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16665 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16666 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16668 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16669 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16670 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16671 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16672 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16673 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16675 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16676 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16677 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16678 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16680 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16681 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16682 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16683 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16684 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16685 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16687 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16689 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16690 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16691 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16693 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16694 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16696 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16697 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16699 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16700 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16701 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16703 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16704 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16705 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16707 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16708 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16710 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16712 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16713 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16714 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16715 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16718 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16719 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16720 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16721 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16722 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16723 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16726 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16727 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16728 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16729 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16730 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16731 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16732 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16733 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16734 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16735 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16736 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16737 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16738 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16739 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16740 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16741 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16742 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16743 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16744 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16745 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16746 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16747 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16748 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16749 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16752 static const struct builtin_description bdesc_1arg[] =
16754 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16755 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16757 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16758 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16759 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16761 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16762 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16763 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16764 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16765 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16766 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16768 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16769 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16771 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16773 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16774 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16776 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16777 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16778 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16779 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16780 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16782 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16784 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16785 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16786 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16787 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16789 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16790 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16791 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16794 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16795 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16798 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16799 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16800 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16801 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16802 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16803 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16806 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16807 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16810 ix86_init_mmx_sse_builtins (void)
16812 const struct builtin_description * d;
16815 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16816 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16817 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16818 tree V2DI_type_node
16819 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16820 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16821 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16822 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16823 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16824 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16825 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16827 tree pchar_type_node = build_pointer_type (char_type_node);
16828 tree pcchar_type_node = build_pointer_type (
16829 build_type_variant (char_type_node, 1, 0));
16830 tree pfloat_type_node = build_pointer_type (float_type_node);
16831 tree pcfloat_type_node = build_pointer_type (
16832 build_type_variant (float_type_node, 1, 0));
16833 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16834 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16835 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16838 tree int_ftype_v4sf_v4sf
16839 = build_function_type_list (integer_type_node,
16840 V4SF_type_node, V4SF_type_node, NULL_TREE);
16841 tree v4si_ftype_v4sf_v4sf
16842 = build_function_type_list (V4SI_type_node,
16843 V4SF_type_node, V4SF_type_node, NULL_TREE);
16844 /* MMX/SSE/integer conversions. */
16845 tree int_ftype_v4sf
16846 = build_function_type_list (integer_type_node,
16847 V4SF_type_node, NULL_TREE);
16848 tree int64_ftype_v4sf
16849 = build_function_type_list (long_long_integer_type_node,
16850 V4SF_type_node, NULL_TREE);
16851 tree int_ftype_v8qi
16852 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16853 tree v4sf_ftype_v4sf_int
16854 = build_function_type_list (V4SF_type_node,
16855 V4SF_type_node, integer_type_node, NULL_TREE);
16856 tree v4sf_ftype_v4sf_int64
16857 = build_function_type_list (V4SF_type_node,
16858 V4SF_type_node, long_long_integer_type_node,
16860 tree v4sf_ftype_v4sf_v2si
16861 = build_function_type_list (V4SF_type_node,
16862 V4SF_type_node, V2SI_type_node, NULL_TREE);
16864 /* Miscellaneous. */
16865 tree v8qi_ftype_v4hi_v4hi
16866 = build_function_type_list (V8QI_type_node,
16867 V4HI_type_node, V4HI_type_node, NULL_TREE);
16868 tree v4hi_ftype_v2si_v2si
16869 = build_function_type_list (V4HI_type_node,
16870 V2SI_type_node, V2SI_type_node, NULL_TREE);
16871 tree v4sf_ftype_v4sf_v4sf_int
16872 = build_function_type_list (V4SF_type_node,
16873 V4SF_type_node, V4SF_type_node,
16874 integer_type_node, NULL_TREE);
16875 tree v2si_ftype_v4hi_v4hi
16876 = build_function_type_list (V2SI_type_node,
16877 V4HI_type_node, V4HI_type_node, NULL_TREE);
16878 tree v4hi_ftype_v4hi_int
16879 = build_function_type_list (V4HI_type_node,
16880 V4HI_type_node, integer_type_node, NULL_TREE);
16881 tree v4hi_ftype_v4hi_di
16882 = build_function_type_list (V4HI_type_node,
16883 V4HI_type_node, long_long_unsigned_type_node,
16885 tree v2si_ftype_v2si_di
16886 = build_function_type_list (V2SI_type_node,
16887 V2SI_type_node, long_long_unsigned_type_node,
16889 tree void_ftype_void
16890 = build_function_type (void_type_node, void_list_node);
16891 tree void_ftype_unsigned
16892 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16893 tree void_ftype_unsigned_unsigned
16894 = build_function_type_list (void_type_node, unsigned_type_node,
16895 unsigned_type_node, NULL_TREE);
16896 tree void_ftype_pcvoid_unsigned_unsigned
16897 = build_function_type_list (void_type_node, const_ptr_type_node,
16898 unsigned_type_node, unsigned_type_node,
16900 tree unsigned_ftype_void
16901 = build_function_type (unsigned_type_node, void_list_node);
16902 tree v2si_ftype_v4sf
16903 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16904 /* Loads/stores. */
16905 tree void_ftype_v8qi_v8qi_pchar
16906 = build_function_type_list (void_type_node,
16907 V8QI_type_node, V8QI_type_node,
16908 pchar_type_node, NULL_TREE);
16909 tree v4sf_ftype_pcfloat
16910 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16911 /* @@@ the type is bogus */
16912 tree v4sf_ftype_v4sf_pv2si
16913 = build_function_type_list (V4SF_type_node,
16914 V4SF_type_node, pv2si_type_node, NULL_TREE);
16915 tree void_ftype_pv2si_v4sf
16916 = build_function_type_list (void_type_node,
16917 pv2si_type_node, V4SF_type_node, NULL_TREE);
16918 tree void_ftype_pfloat_v4sf
16919 = build_function_type_list (void_type_node,
16920 pfloat_type_node, V4SF_type_node, NULL_TREE);
16921 tree void_ftype_pdi_di
16922 = build_function_type_list (void_type_node,
16923 pdi_type_node, long_long_unsigned_type_node,
16925 tree void_ftype_pv2di_v2di
16926 = build_function_type_list (void_type_node,
16927 pv2di_type_node, V2DI_type_node, NULL_TREE);
16928 /* Normal vector unops. */
16929 tree v4sf_ftype_v4sf
16930 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16931 tree v16qi_ftype_v16qi
16932 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16933 tree v8hi_ftype_v8hi
16934 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16935 tree v4si_ftype_v4si
16936 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16937 tree v8qi_ftype_v8qi
16938 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16939 tree v4hi_ftype_v4hi
16940 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16942 /* Normal vector binops. */
16943 tree v4sf_ftype_v4sf_v4sf
16944 = build_function_type_list (V4SF_type_node,
16945 V4SF_type_node, V4SF_type_node, NULL_TREE);
16946 tree v8qi_ftype_v8qi_v8qi
16947 = build_function_type_list (V8QI_type_node,
16948 V8QI_type_node, V8QI_type_node, NULL_TREE);
16949 tree v4hi_ftype_v4hi_v4hi
16950 = build_function_type_list (V4HI_type_node,
16951 V4HI_type_node, V4HI_type_node, NULL_TREE);
16952 tree v2si_ftype_v2si_v2si
16953 = build_function_type_list (V2SI_type_node,
16954 V2SI_type_node, V2SI_type_node, NULL_TREE);
16955 tree di_ftype_di_di
16956 = build_function_type_list (long_long_unsigned_type_node,
16957 long_long_unsigned_type_node,
16958 long_long_unsigned_type_node, NULL_TREE);
16960 tree di_ftype_di_di_int
16961 = build_function_type_list (long_long_unsigned_type_node,
16962 long_long_unsigned_type_node,
16963 long_long_unsigned_type_node,
16964 integer_type_node, NULL_TREE);
16966 tree v2si_ftype_v2sf
16967 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16968 tree v2sf_ftype_v2si
16969 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16970 tree v2si_ftype_v2si
16971 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16972 tree v2sf_ftype_v2sf
16973 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16974 tree v2sf_ftype_v2sf_v2sf
16975 = build_function_type_list (V2SF_type_node,
16976 V2SF_type_node, V2SF_type_node, NULL_TREE);
16977 tree v2si_ftype_v2sf_v2sf
16978 = build_function_type_list (V2SI_type_node,
16979 V2SF_type_node, V2SF_type_node, NULL_TREE);
16980 tree pint_type_node = build_pointer_type (integer_type_node);
16981 tree pdouble_type_node = build_pointer_type (double_type_node);
16982 tree pcdouble_type_node = build_pointer_type (
16983 build_type_variant (double_type_node, 1, 0));
16984 tree int_ftype_v2df_v2df
16985 = build_function_type_list (integer_type_node,
16986 V2DF_type_node, V2DF_type_node, NULL_TREE);
16988 tree void_ftype_pcvoid
16989 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16990 tree v4sf_ftype_v4si
16991 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16992 tree v4si_ftype_v4sf
16993 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16994 tree v2df_ftype_v4si
16995 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16996 tree v4si_ftype_v2df
16997 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16998 tree v2si_ftype_v2df
16999 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17000 tree v4sf_ftype_v2df
17001 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17002 tree v2df_ftype_v2si
17003 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17004 tree v2df_ftype_v4sf
17005 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17006 tree int_ftype_v2df
17007 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17008 tree int64_ftype_v2df
17009 = build_function_type_list (long_long_integer_type_node,
17010 V2DF_type_node, NULL_TREE);
17011 tree v2df_ftype_v2df_int
17012 = build_function_type_list (V2DF_type_node,
17013 V2DF_type_node, integer_type_node, NULL_TREE);
17014 tree v2df_ftype_v2df_int64
17015 = build_function_type_list (V2DF_type_node,
17016 V2DF_type_node, long_long_integer_type_node,
17018 tree v4sf_ftype_v4sf_v2df
17019 = build_function_type_list (V4SF_type_node,
17020 V4SF_type_node, V2DF_type_node, NULL_TREE);
17021 tree v2df_ftype_v2df_v4sf
17022 = build_function_type_list (V2DF_type_node,
17023 V2DF_type_node, V4SF_type_node, NULL_TREE);
17024 tree v2df_ftype_v2df_v2df_int
17025 = build_function_type_list (V2DF_type_node,
17026 V2DF_type_node, V2DF_type_node,
17029 tree v2df_ftype_v2df_pcdouble
17030 = build_function_type_list (V2DF_type_node,
17031 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17032 tree void_ftype_pdouble_v2df
17033 = build_function_type_list (void_type_node,
17034 pdouble_type_node, V2DF_type_node, NULL_TREE);
17035 tree void_ftype_pint_int
17036 = build_function_type_list (void_type_node,
17037 pint_type_node, integer_type_node, NULL_TREE);
17038 tree void_ftype_v16qi_v16qi_pchar
17039 = build_function_type_list (void_type_node,
17040 V16QI_type_node, V16QI_type_node,
17041 pchar_type_node, NULL_TREE);
17042 tree v2df_ftype_pcdouble
17043 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17044 tree v2df_ftype_v2df_v2df
17045 = build_function_type_list (V2DF_type_node,
17046 V2DF_type_node, V2DF_type_node, NULL_TREE);
17047 tree v16qi_ftype_v16qi_v16qi
17048 = build_function_type_list (V16QI_type_node,
17049 V16QI_type_node, V16QI_type_node, NULL_TREE);
17050 tree v8hi_ftype_v8hi_v8hi
17051 = build_function_type_list (V8HI_type_node,
17052 V8HI_type_node, V8HI_type_node, NULL_TREE);
17053 tree v4si_ftype_v4si_v4si
17054 = build_function_type_list (V4SI_type_node,
17055 V4SI_type_node, V4SI_type_node, NULL_TREE);
17056 tree v2di_ftype_v2di_v2di
17057 = build_function_type_list (V2DI_type_node,
17058 V2DI_type_node, V2DI_type_node, NULL_TREE);
17059 tree v2di_ftype_v2df_v2df
17060 = build_function_type_list (V2DI_type_node,
17061 V2DF_type_node, V2DF_type_node, NULL_TREE);
17062 tree v2df_ftype_v2df
17063 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17064 tree v2di_ftype_v2di_int
17065 = build_function_type_list (V2DI_type_node,
17066 V2DI_type_node, integer_type_node, NULL_TREE);
17067 tree v2di_ftype_v2di_v2di_int
17068 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17069 V2DI_type_node, integer_type_node, NULL_TREE);
17070 tree v4si_ftype_v4si_int
17071 = build_function_type_list (V4SI_type_node,
17072 V4SI_type_node, integer_type_node, NULL_TREE);
17073 tree v8hi_ftype_v8hi_int
17074 = build_function_type_list (V8HI_type_node,
17075 V8HI_type_node, integer_type_node, NULL_TREE);
17076 tree v4si_ftype_v8hi_v8hi
17077 = build_function_type_list (V4SI_type_node,
17078 V8HI_type_node, V8HI_type_node, NULL_TREE);
17079 tree di_ftype_v8qi_v8qi
17080 = build_function_type_list (long_long_unsigned_type_node,
17081 V8QI_type_node, V8QI_type_node, NULL_TREE);
17082 tree di_ftype_v2si_v2si
17083 = build_function_type_list (long_long_unsigned_type_node,
17084 V2SI_type_node, V2SI_type_node, NULL_TREE);
17085 tree v2di_ftype_v16qi_v16qi
17086 = build_function_type_list (V2DI_type_node,
17087 V16QI_type_node, V16QI_type_node, NULL_TREE);
17088 tree v2di_ftype_v4si_v4si
17089 = build_function_type_list (V2DI_type_node,
17090 V4SI_type_node, V4SI_type_node, NULL_TREE);
17091 tree int_ftype_v16qi
17092 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17093 tree v16qi_ftype_pcchar
17094 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17095 tree void_ftype_pchar_v16qi
17096 = build_function_type_list (void_type_node,
17097 pchar_type_node, V16QI_type_node, NULL_TREE);
17099 tree v2di_ftype_v2di_unsigned_unsigned
17100 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17101 unsigned_type_node, unsigned_type_node,
17103 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17104 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17105 unsigned_type_node, unsigned_type_node,
17107 tree v2di_ftype_v2di_v16qi
17108 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17112 tree float128_type;
17115 /* The __float80 type. */
17116 if (TYPE_MODE (long_double_type_node) == XFmode)
17117 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17121 /* The __float80 type. */
17122 float80_type = make_node (REAL_TYPE);
17123 TYPE_PRECISION (float80_type) = 80;
17124 layout_type (float80_type);
17125 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17130 float128_type = make_node (REAL_TYPE);
17131 TYPE_PRECISION (float128_type) = 128;
17132 layout_type (float128_type);
17133 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17136 /* Add all builtins that are more or less simple operations on two
17138 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17140 /* Use one of the operands; the target can have a different mode for
17141 mask-generating compares. */
17142 enum machine_mode mode;
17147 mode = insn_data[d->icode].operand[1].mode;
17152 type = v16qi_ftype_v16qi_v16qi;
17155 type = v8hi_ftype_v8hi_v8hi;
17158 type = v4si_ftype_v4si_v4si;
17161 type = v2di_ftype_v2di_v2di;
17164 type = v2df_ftype_v2df_v2df;
17167 type = v4sf_ftype_v4sf_v4sf;
17170 type = v8qi_ftype_v8qi_v8qi;
17173 type = v4hi_ftype_v4hi_v4hi;
17176 type = v2si_ftype_v2si_v2si;
17179 type = di_ftype_di_di;
17183 gcc_unreachable ();
17186 /* Override for comparisons. */
17187 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17188 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17189 type = v4si_ftype_v4sf_v4sf;
17191 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17192 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17193 type = v2di_ftype_v2df_v2df;
17195 def_builtin (d->mask, d->name, type, d->code);
17198 /* Add all builtins that are more or less simple operations on 1 operand. */
17199 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17201 enum machine_mode mode;
17206 mode = insn_data[d->icode].operand[1].mode;
17211 type = v16qi_ftype_v16qi;
17214 type = v8hi_ftype_v8hi;
17217 type = v4si_ftype_v4si;
17220 type = v2df_ftype_v2df;
17223 type = v4sf_ftype_v4sf;
17226 type = v8qi_ftype_v8qi;
17229 type = v4hi_ftype_v4hi;
17232 type = v2si_ftype_v2si;
17239 def_builtin (d->mask, d->name, type, d->code);
17242 /* Add the remaining MMX insns with somewhat more complicated types. */
17243 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17244 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17245 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17246 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17248 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17249 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17250 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17252 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17253 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17255 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17256 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17258 /* comi/ucomi insns. */
17259 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17260 if (d->mask == MASK_SSE2)
17261 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17263 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17265 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17266 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17267 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17269 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17270 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17271 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17272 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17273 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17274 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17275 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17276 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17277 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17278 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17279 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17281 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17283 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17284 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17286 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17287 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17288 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17289 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17291 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17292 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17293 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17294 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17296 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17298 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17300 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17301 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17302 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17303 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17304 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17305 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17307 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17309 /* Original 3DNow! */
17310 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17311 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17312 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17313 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17314 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17315 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17316 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17317 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17318 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17319 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17320 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17321 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17322 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17323 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17328 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17329 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17331 /* 3DNow! extension as used in the Athlon CPU. */
17332 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17333 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17334 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17335 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17336 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17337 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17340 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17342 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17343 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17346 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17349 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17350 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17352 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17355 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17356 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17357 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17359 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17360 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17362 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17364 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17365 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17367 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17368 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17369 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17370 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17371 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17373 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17375 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17376 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17377 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17378 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17380 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17381 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17382 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17384 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17385 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17386 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17387 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17389 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17390 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17391 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17393 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17394 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17396 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17397 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17399 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17400 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17401 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17402 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17403 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17404 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17405 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17407 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17408 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17409 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17410 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17411 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17412 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17413 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17415 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17416 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17417 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17418 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17420 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17422 /* Prescott New Instructions. */
17423 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17424 void_ftype_pcvoid_unsigned_unsigned,
17425 IX86_BUILTIN_MONITOR);
17426 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17427 void_ftype_unsigned_unsigned,
17428 IX86_BUILTIN_MWAIT);
17429 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17430 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17433 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17434 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17435 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17436 IX86_BUILTIN_PALIGNR);
17438 /* AMDFAM10 SSE4A New built-ins */
17439 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17440 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17441 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17442 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17443 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17444 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17445 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17446 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17447 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17448 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17449 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17450 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17452 /* Access to the vec_init patterns. */
17453 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17454 integer_type_node, NULL_TREE);
17455 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17456 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17458 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17459 short_integer_type_node,
17460 short_integer_type_node,
17461 short_integer_type_node, NULL_TREE);
17462 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17463 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17465 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17466 char_type_node, char_type_node,
17467 char_type_node, char_type_node,
17468 char_type_node, char_type_node,
17469 char_type_node, NULL_TREE);
17470 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17471 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17473 /* Access to the vec_extract patterns. */
17474 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17475 integer_type_node, NULL_TREE);
17476 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17477 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17479 ftype = build_function_type_list (long_long_integer_type_node,
17480 V2DI_type_node, integer_type_node,
17482 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17483 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17485 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17486 integer_type_node, NULL_TREE);
17487 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17488 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17490 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17491 integer_type_node, NULL_TREE);
17492 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17493 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17495 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17496 integer_type_node, NULL_TREE);
17497 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17498 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17500 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17501 integer_type_node, NULL_TREE);
17502 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17503 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17505 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17506 integer_type_node, NULL_TREE);
17507 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17508 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17510 /* Access to the vec_set patterns. */
17511 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17513 integer_type_node, NULL_TREE);
17514 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17515 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17517 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17519 integer_type_node, NULL_TREE);
17520 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17521 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17525 ix86_init_builtins (void)
17528 ix86_init_mmx_sse_builtins ();
17531 /* Errors in the source file can cause expand_expr to return const0_rtx
17532 where we expect a vector. To avoid crashing, use one of the vector
17533 clear instructions. */
17535 safe_vector_operand (rtx x, enum machine_mode mode)
17537 if (x == const0_rtx)
17538 x = CONST0_RTX (mode);
17542 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17545 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17548 tree arg0 = CALL_EXPR_ARG (exp, 0);
17549 tree arg1 = CALL_EXPR_ARG (exp, 1);
17550 rtx op0 = expand_normal (arg0);
17551 rtx op1 = expand_normal (arg1);
17552 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17553 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17554 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17556 if (VECTOR_MODE_P (mode0))
17557 op0 = safe_vector_operand (op0, mode0);
17558 if (VECTOR_MODE_P (mode1))
17559 op1 = safe_vector_operand (op1, mode1);
17561 if (optimize || !target
17562 || GET_MODE (target) != tmode
17563 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17564 target = gen_reg_rtx (tmode);
17566 if (GET_MODE (op1) == SImode && mode1 == TImode)
17568 rtx x = gen_reg_rtx (V4SImode);
17569 emit_insn (gen_sse2_loadd (x, op1));
17570 op1 = gen_lowpart (TImode, x);
17573 /* The insn must want input operands in the same modes as the
17575 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17576 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17578 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17579 op0 = copy_to_mode_reg (mode0, op0);
17580 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17581 op1 = copy_to_mode_reg (mode1, op1);
17583 /* ??? Using ix86_fixup_binary_operands is problematic when
17584 we've got mismatched modes. Fake it. */
17590 if (tmode == mode0 && tmode == mode1)
17592 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17596 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17598 op0 = force_reg (mode0, op0);
17599 op1 = force_reg (mode1, op1);
17600 target = gen_reg_rtx (tmode);
17603 pat = GEN_FCN (icode) (target, op0, op1);
17610 /* Subroutine of ix86_expand_builtin to take care of stores. */
17613 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17616 tree arg0 = CALL_EXPR_ARG (exp, 0);
17617 tree arg1 = CALL_EXPR_ARG (exp, 1);
17618 rtx op0 = expand_normal (arg0);
17619 rtx op1 = expand_normal (arg1);
17620 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17621 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17623 if (VECTOR_MODE_P (mode1))
17624 op1 = safe_vector_operand (op1, mode1);
17626 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17627 op1 = copy_to_mode_reg (mode1, op1);
17629 pat = GEN_FCN (icode) (op0, op1);
17635 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17638 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17639 rtx target, int do_load)
17642 tree arg0 = CALL_EXPR_ARG (exp, 0);
17643 rtx op0 = expand_normal (arg0);
17644 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17645 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17647 if (optimize || !target
17648 || GET_MODE (target) != tmode
17649 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17650 target = gen_reg_rtx (tmode);
17652 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17655 if (VECTOR_MODE_P (mode0))
17656 op0 = safe_vector_operand (op0, mode0);
17658 if ((optimize && !register_operand (op0, mode0))
17659 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17660 op0 = copy_to_mode_reg (mode0, op0);
17663 pat = GEN_FCN (icode) (target, op0);
17670 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17671 sqrtss, rsqrtss, rcpss. */
17674 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17677 tree arg0 = CALL_EXPR_ARG (exp, 0);
17678 rtx op1, op0 = expand_normal (arg0);
17679 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17680 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17682 if (optimize || !target
17683 || GET_MODE (target) != tmode
17684 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17685 target = gen_reg_rtx (tmode);
17687 if (VECTOR_MODE_P (mode0))
17688 op0 = safe_vector_operand (op0, mode0);
17690 if ((optimize && !register_operand (op0, mode0))
17691 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17692 op0 = copy_to_mode_reg (mode0, op0);
17695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17696 op1 = copy_to_mode_reg (mode0, op1);
17698 pat = GEN_FCN (icode) (target, op0, op1);
17705 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17708 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17712 tree arg0 = CALL_EXPR_ARG (exp, 0);
17713 tree arg1 = CALL_EXPR_ARG (exp, 1);
17714 rtx op0 = expand_normal (arg0);
17715 rtx op1 = expand_normal (arg1);
17717 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17718 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17719 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17720 enum rtx_code comparison = d->comparison;
17722 if (VECTOR_MODE_P (mode0))
17723 op0 = safe_vector_operand (op0, mode0);
17724 if (VECTOR_MODE_P (mode1))
17725 op1 = safe_vector_operand (op1, mode1);
17727 /* Swap operands if we have a comparison that isn't available in
17729 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17731 rtx tmp = gen_reg_rtx (mode1);
17732 emit_move_insn (tmp, op1);
17737 if (optimize || !target
17738 || GET_MODE (target) != tmode
17739 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17740 target = gen_reg_rtx (tmode);
17742 if ((optimize && !register_operand (op0, mode0))
17743 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17744 op0 = copy_to_mode_reg (mode0, op0);
17745 if ((optimize && !register_operand (op1, mode1))
17746 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17747 op1 = copy_to_mode_reg (mode1, op1);
17749 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17750 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17757 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17760 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17764 tree arg0 = CALL_EXPR_ARG (exp, 0);
17765 tree arg1 = CALL_EXPR_ARG (exp, 1);
17766 rtx op0 = expand_normal (arg0);
17767 rtx op1 = expand_normal (arg1);
17769 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17770 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17771 enum rtx_code comparison = d->comparison;
17773 if (VECTOR_MODE_P (mode0))
17774 op0 = safe_vector_operand (op0, mode0);
17775 if (VECTOR_MODE_P (mode1))
17776 op1 = safe_vector_operand (op1, mode1);
17778 /* Swap operands if we have a comparison that isn't available in
17780 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17787 target = gen_reg_rtx (SImode);
17788 emit_move_insn (target, const0_rtx);
17789 target = gen_rtx_SUBREG (QImode, target, 0);
17791 if ((optimize && !register_operand (op0, mode0))
17792 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17793 op0 = copy_to_mode_reg (mode0, op0);
17794 if ((optimize && !register_operand (op1, mode1))
17795 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17796 op1 = copy_to_mode_reg (mode1, op1);
17798 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17799 pat = GEN_FCN (d->icode) (op0, op1);
17803 emit_insn (gen_rtx_SET (VOIDmode,
17804 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17805 gen_rtx_fmt_ee (comparison, QImode,
17809 return SUBREG_REG (target);
17812 /* Return the integer constant in ARG. Constrain it to be in the range
17813 of the subparts of VEC_TYPE; issue an error if not. */
17816 get_element_number (tree vec_type, tree arg)
17818 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17820 if (!host_integerp (arg, 1)
17821 || (elt = tree_low_cst (arg, 1), elt > max))
17823 error ("selector must be an integer constant in the range 0..%wi", max);
17830 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17831 ix86_expand_vector_init. We DO have language-level syntax for this, in
17832 the form of (type){ init-list }. Except that since we can't place emms
17833 instructions from inside the compiler, we can't allow the use of MMX
17834 registers unless the user explicitly asks for it. So we do *not* define
17835 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17836 we have builtins invoked by mmintrin.h that gives us license to emit
17837 these sorts of instructions. */
17840 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17842 enum machine_mode tmode = TYPE_MODE (type);
17843 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17844 int i, n_elt = GET_MODE_NUNITS (tmode);
17845 rtvec v = rtvec_alloc (n_elt);
17847 gcc_assert (VECTOR_MODE_P (tmode));
17848 gcc_assert (call_expr_nargs (exp) == n_elt);
17850 for (i = 0; i < n_elt; ++i)
17852 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17853 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17856 if (!target || !register_operand (target, tmode))
17857 target = gen_reg_rtx (tmode);
17859 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17863 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17864 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17865 had a language-level syntax for referencing vector elements. */
17868 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17870 enum machine_mode tmode, mode0;
17875 arg0 = CALL_EXPR_ARG (exp, 0);
17876 arg1 = CALL_EXPR_ARG (exp, 1);
17878 op0 = expand_normal (arg0);
17879 elt = get_element_number (TREE_TYPE (arg0), arg1);
17881 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17882 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17883 gcc_assert (VECTOR_MODE_P (mode0));
17885 op0 = force_reg (mode0, op0);
17887 if (optimize || !target || !register_operand (target, tmode))
17888 target = gen_reg_rtx (tmode);
17890 ix86_expand_vector_extract (true, target, op0, elt);
17895 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17896 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17897 a language-level syntax for referencing vector elements. */
17900 ix86_expand_vec_set_builtin (tree exp)
17902 enum machine_mode tmode, mode1;
17903 tree arg0, arg1, arg2;
17907 arg0 = CALL_EXPR_ARG (exp, 0);
17908 arg1 = CALL_EXPR_ARG (exp, 1);
17909 arg2 = CALL_EXPR_ARG (exp, 2);
17911 tmode = TYPE_MODE (TREE_TYPE (arg0));
17912 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17913 gcc_assert (VECTOR_MODE_P (tmode));
17915 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17916 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17917 elt = get_element_number (TREE_TYPE (arg0), arg2);
17919 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17920 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17922 op0 = force_reg (tmode, op0);
17923 op1 = force_reg (mode1, op1);
17925 ix86_expand_vector_set (true, op0, op1, elt);
17930 /* Expand an expression EXP that calls a built-in function,
17931 with result going to TARGET if that's convenient
17932 (and in mode MODE if that's convenient).
17933 SUBTARGET may be used as the target for computing one of EXP's operands.
17934 IGNORE is nonzero if the value is to be ignored. */
17937 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17938 enum machine_mode mode ATTRIBUTE_UNUSED,
17939 int ignore ATTRIBUTE_UNUSED)
17941 const struct builtin_description *d;
17943 enum insn_code icode;
17944 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17945 tree arg0, arg1, arg2, arg3;
17946 rtx op0, op1, op2, op3, pat;
17947 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17948 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17952 case IX86_BUILTIN_EMMS:
17953 emit_insn (gen_mmx_emms ());
17956 case IX86_BUILTIN_SFENCE:
17957 emit_insn (gen_sse_sfence ());
17960 case IX86_BUILTIN_MASKMOVQ:
17961 case IX86_BUILTIN_MASKMOVDQU:
17962 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17963 ? CODE_FOR_mmx_maskmovq
17964 : CODE_FOR_sse2_maskmovdqu);
17965 /* Note the arg order is different from the operand order. */
17966 arg1 = CALL_EXPR_ARG (exp, 0);
17967 arg2 = CALL_EXPR_ARG (exp, 1);
17968 arg0 = CALL_EXPR_ARG (exp, 2);
17969 op0 = expand_normal (arg0);
17970 op1 = expand_normal (arg1);
17971 op2 = expand_normal (arg2);
17972 mode0 = insn_data[icode].operand[0].mode;
17973 mode1 = insn_data[icode].operand[1].mode;
17974 mode2 = insn_data[icode].operand[2].mode;
17976 op0 = force_reg (Pmode, op0);
17977 op0 = gen_rtx_MEM (mode1, op0);
17979 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17980 op0 = copy_to_mode_reg (mode0, op0);
17981 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17982 op1 = copy_to_mode_reg (mode1, op1);
17983 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17984 op2 = copy_to_mode_reg (mode2, op2);
17985 pat = GEN_FCN (icode) (op0, op1, op2);
17991 case IX86_BUILTIN_SQRTSS:
17992 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17993 case IX86_BUILTIN_RSQRTSS:
17994 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17995 case IX86_BUILTIN_RCPSS:
17996 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17998 case IX86_BUILTIN_LOADUPS:
17999 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18001 case IX86_BUILTIN_STOREUPS:
18002 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18004 case IX86_BUILTIN_LOADHPS:
18005 case IX86_BUILTIN_LOADLPS:
18006 case IX86_BUILTIN_LOADHPD:
18007 case IX86_BUILTIN_LOADLPD:
18008 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18009 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18010 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18011 : CODE_FOR_sse2_loadlpd);
18012 arg0 = CALL_EXPR_ARG (exp, 0);
18013 arg1 = CALL_EXPR_ARG (exp, 1);
18014 op0 = expand_normal (arg0);
18015 op1 = expand_normal (arg1);
18016 tmode = insn_data[icode].operand[0].mode;
18017 mode0 = insn_data[icode].operand[1].mode;
18018 mode1 = insn_data[icode].operand[2].mode;
18020 op0 = force_reg (mode0, op0);
18021 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18022 if (optimize || target == 0
18023 || GET_MODE (target) != tmode
18024 || !register_operand (target, tmode))
18025 target = gen_reg_rtx (tmode);
18026 pat = GEN_FCN (icode) (target, op0, op1);
18032 case IX86_BUILTIN_STOREHPS:
18033 case IX86_BUILTIN_STORELPS:
18034 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18035 : CODE_FOR_sse_storelps);
18036 arg0 = CALL_EXPR_ARG (exp, 0);
18037 arg1 = CALL_EXPR_ARG (exp, 1);
18038 op0 = expand_normal (arg0);
18039 op1 = expand_normal (arg1);
18040 mode0 = insn_data[icode].operand[0].mode;
18041 mode1 = insn_data[icode].operand[1].mode;
18043 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18044 op1 = force_reg (mode1, op1);
18046 pat = GEN_FCN (icode) (op0, op1);
18052 case IX86_BUILTIN_MOVNTPS:
18053 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18054 case IX86_BUILTIN_MOVNTQ:
18055 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18057 case IX86_BUILTIN_LDMXCSR:
18058 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18059 target = assign_386_stack_local (SImode, SLOT_TEMP);
18060 emit_move_insn (target, op0);
18061 emit_insn (gen_sse_ldmxcsr (target));
18064 case IX86_BUILTIN_STMXCSR:
18065 target = assign_386_stack_local (SImode, SLOT_TEMP);
18066 emit_insn (gen_sse_stmxcsr (target));
18067 return copy_to_mode_reg (SImode, target);
18069 case IX86_BUILTIN_SHUFPS:
18070 case IX86_BUILTIN_SHUFPD:
18071 icode = (fcode == IX86_BUILTIN_SHUFPS
18072 ? CODE_FOR_sse_shufps
18073 : CODE_FOR_sse2_shufpd);
18074 arg0 = CALL_EXPR_ARG (exp, 0);
18075 arg1 = CALL_EXPR_ARG (exp, 1);
18076 arg2 = CALL_EXPR_ARG (exp, 2);
18077 op0 = expand_normal (arg0);
18078 op1 = expand_normal (arg1);
18079 op2 = expand_normal (arg2);
18080 tmode = insn_data[icode].operand[0].mode;
18081 mode0 = insn_data[icode].operand[1].mode;
18082 mode1 = insn_data[icode].operand[2].mode;
18083 mode2 = insn_data[icode].operand[3].mode;
18085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18086 op0 = copy_to_mode_reg (mode0, op0);
18087 if ((optimize && !register_operand (op1, mode1))
18088 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18089 op1 = copy_to_mode_reg (mode1, op1);
18090 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18092 /* @@@ better error message */
18093 error ("mask must be an immediate");
18094 return gen_reg_rtx (tmode);
18096 if (optimize || target == 0
18097 || GET_MODE (target) != tmode
18098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18099 target = gen_reg_rtx (tmode);
18100 pat = GEN_FCN (icode) (target, op0, op1, op2);
18106 case IX86_BUILTIN_PSHUFW:
18107 case IX86_BUILTIN_PSHUFD:
18108 case IX86_BUILTIN_PSHUFHW:
18109 case IX86_BUILTIN_PSHUFLW:
18110 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18111 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18112 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18113 : CODE_FOR_mmx_pshufw);
18114 arg0 = CALL_EXPR_ARG (exp, 0);
18115 arg1 = CALL_EXPR_ARG (exp, 1);
18116 op0 = expand_normal (arg0);
18117 op1 = expand_normal (arg1);
18118 tmode = insn_data[icode].operand[0].mode;
18119 mode1 = insn_data[icode].operand[1].mode;
18120 mode2 = insn_data[icode].operand[2].mode;
18122 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18123 op0 = copy_to_mode_reg (mode1, op0);
18124 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18126 /* @@@ better error message */
18127 error ("mask must be an immediate");
18131 || GET_MODE (target) != tmode
18132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18133 target = gen_reg_rtx (tmode);
18134 pat = GEN_FCN (icode) (target, op0, op1);
18140 case IX86_BUILTIN_PSLLWI128:
18141 icode = CODE_FOR_ashlv8hi3;
18143 case IX86_BUILTIN_PSLLDI128:
18144 icode = CODE_FOR_ashlv4si3;
18146 case IX86_BUILTIN_PSLLQI128:
18147 icode = CODE_FOR_ashlv2di3;
18149 case IX86_BUILTIN_PSRAWI128:
18150 icode = CODE_FOR_ashrv8hi3;
18152 case IX86_BUILTIN_PSRADI128:
18153 icode = CODE_FOR_ashrv4si3;
18155 case IX86_BUILTIN_PSRLWI128:
18156 icode = CODE_FOR_lshrv8hi3;
18158 case IX86_BUILTIN_PSRLDI128:
18159 icode = CODE_FOR_lshrv4si3;
18161 case IX86_BUILTIN_PSRLQI128:
18162 icode = CODE_FOR_lshrv2di3;
18165 arg0 = CALL_EXPR_ARG (exp, 0);
18166 arg1 = CALL_EXPR_ARG (exp, 1);
18167 op0 = expand_normal (arg0);
18168 op1 = expand_normal (arg1);
18170 if (!CONST_INT_P (op1))
18172 error ("shift must be an immediate");
18175 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18176 op1 = GEN_INT (255);
18178 tmode = insn_data[icode].operand[0].mode;
18179 mode1 = insn_data[icode].operand[1].mode;
18180 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18181 op0 = copy_to_reg (op0);
18183 target = gen_reg_rtx (tmode);
18184 pat = GEN_FCN (icode) (target, op0, op1);
18190 case IX86_BUILTIN_PSLLW128:
18191 icode = CODE_FOR_ashlv8hi3;
18193 case IX86_BUILTIN_PSLLD128:
18194 icode = CODE_FOR_ashlv4si3;
18196 case IX86_BUILTIN_PSLLQ128:
18197 icode = CODE_FOR_ashlv2di3;
18199 case IX86_BUILTIN_PSRAW128:
18200 icode = CODE_FOR_ashrv8hi3;
18202 case IX86_BUILTIN_PSRAD128:
18203 icode = CODE_FOR_ashrv4si3;
18205 case IX86_BUILTIN_PSRLW128:
18206 icode = CODE_FOR_lshrv8hi3;
18208 case IX86_BUILTIN_PSRLD128:
18209 icode = CODE_FOR_lshrv4si3;
18211 case IX86_BUILTIN_PSRLQ128:
18212 icode = CODE_FOR_lshrv2di3;
18215 arg0 = CALL_EXPR_ARG (exp, 0);
18216 arg1 = CALL_EXPR_ARG (exp, 1);
18217 op0 = expand_normal (arg0);
18218 op1 = expand_normal (arg1);
18220 tmode = insn_data[icode].operand[0].mode;
18221 mode1 = insn_data[icode].operand[1].mode;
18223 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18224 op0 = copy_to_reg (op0);
18226 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18227 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18228 op1 = copy_to_reg (op1);
18230 target = gen_reg_rtx (tmode);
18231 pat = GEN_FCN (icode) (target, op0, op1);
18237 case IX86_BUILTIN_PSLLDQI128:
18238 case IX86_BUILTIN_PSRLDQI128:
18239 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18240 : CODE_FOR_sse2_lshrti3);
18241 arg0 = CALL_EXPR_ARG (exp, 0);
18242 arg1 = CALL_EXPR_ARG (exp, 1);
18243 op0 = expand_normal (arg0);
18244 op1 = expand_normal (arg1);
18245 tmode = insn_data[icode].operand[0].mode;
18246 mode1 = insn_data[icode].operand[1].mode;
18247 mode2 = insn_data[icode].operand[2].mode;
18249 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18251 op0 = copy_to_reg (op0);
18252 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18254 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18256 error ("shift must be an immediate");
18259 target = gen_reg_rtx (V2DImode);
18260 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18267 case IX86_BUILTIN_FEMMS:
18268 emit_insn (gen_mmx_femms ());
18271 case IX86_BUILTIN_PAVGUSB:
18272 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18274 case IX86_BUILTIN_PF2ID:
18275 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18277 case IX86_BUILTIN_PFACC:
18278 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18280 case IX86_BUILTIN_PFADD:
18281 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18283 case IX86_BUILTIN_PFCMPEQ:
18284 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18286 case IX86_BUILTIN_PFCMPGE:
18287 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18289 case IX86_BUILTIN_PFCMPGT:
18290 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18292 case IX86_BUILTIN_PFMAX:
18293 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18295 case IX86_BUILTIN_PFMIN:
18296 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18298 case IX86_BUILTIN_PFMUL:
18299 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18301 case IX86_BUILTIN_PFRCP:
18302 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18304 case IX86_BUILTIN_PFRCPIT1:
18305 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18307 case IX86_BUILTIN_PFRCPIT2:
18308 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18310 case IX86_BUILTIN_PFRSQIT1:
18311 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18313 case IX86_BUILTIN_PFRSQRT:
18314 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18316 case IX86_BUILTIN_PFSUB:
18317 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18319 case IX86_BUILTIN_PFSUBR:
18320 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18322 case IX86_BUILTIN_PI2FD:
18323 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18325 case IX86_BUILTIN_PMULHRW:
18326 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18328 case IX86_BUILTIN_PF2IW:
18329 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18331 case IX86_BUILTIN_PFNACC:
18332 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18334 case IX86_BUILTIN_PFPNACC:
18335 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18337 case IX86_BUILTIN_PI2FW:
18338 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18340 case IX86_BUILTIN_PSWAPDSI:
18341 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18343 case IX86_BUILTIN_PSWAPDSF:
18344 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18346 case IX86_BUILTIN_SQRTSD:
18347 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18348 case IX86_BUILTIN_LOADUPD:
18349 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18350 case IX86_BUILTIN_STOREUPD:
18351 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18353 case IX86_BUILTIN_MFENCE:
18354 emit_insn (gen_sse2_mfence ());
18356 case IX86_BUILTIN_LFENCE:
18357 emit_insn (gen_sse2_lfence ());
18360 case IX86_BUILTIN_CLFLUSH:
18361 arg0 = CALL_EXPR_ARG (exp, 0);
18362 op0 = expand_normal (arg0);
18363 icode = CODE_FOR_sse2_clflush;
18364 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18365 op0 = copy_to_mode_reg (Pmode, op0);
18367 emit_insn (gen_sse2_clflush (op0));
18370 case IX86_BUILTIN_MOVNTPD:
18371 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18372 case IX86_BUILTIN_MOVNTDQ:
18373 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18374 case IX86_BUILTIN_MOVNTI:
18375 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18377 case IX86_BUILTIN_LOADDQU:
18378 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18379 case IX86_BUILTIN_STOREDQU:
18380 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18382 case IX86_BUILTIN_MONITOR:
18383 arg0 = CALL_EXPR_ARG (exp, 0);
18384 arg1 = CALL_EXPR_ARG (exp, 1);
18385 arg2 = CALL_EXPR_ARG (exp, 2);
18386 op0 = expand_normal (arg0);
18387 op1 = expand_normal (arg1);
18388 op2 = expand_normal (arg2);
18390 op0 = copy_to_mode_reg (Pmode, op0);
18392 op1 = copy_to_mode_reg (SImode, op1);
18394 op2 = copy_to_mode_reg (SImode, op2);
18396 emit_insn (gen_sse3_monitor (op0, op1, op2));
18398 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18401 case IX86_BUILTIN_MWAIT:
18402 arg0 = CALL_EXPR_ARG (exp, 0);
18403 arg1 = CALL_EXPR_ARG (exp, 1);
18404 op0 = expand_normal (arg0);
18405 op1 = expand_normal (arg1);
18407 op0 = copy_to_mode_reg (SImode, op0);
18409 op1 = copy_to_mode_reg (SImode, op1);
18410 emit_insn (gen_sse3_mwait (op0, op1));
18413 case IX86_BUILTIN_LDDQU:
18414 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18417 case IX86_BUILTIN_PALIGNR:
18418 case IX86_BUILTIN_PALIGNR128:
18419 if (fcode == IX86_BUILTIN_PALIGNR)
18421 icode = CODE_FOR_ssse3_palignrdi;
18426 icode = CODE_FOR_ssse3_palignrti;
18429 arg0 = CALL_EXPR_ARG (exp, 0);
18430 arg1 = CALL_EXPR_ARG (exp, 1);
18431 arg2 = CALL_EXPR_ARG (exp, 2);
18432 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18433 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18434 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18435 tmode = insn_data[icode].operand[0].mode;
18436 mode1 = insn_data[icode].operand[1].mode;
18437 mode2 = insn_data[icode].operand[2].mode;
18438 mode3 = insn_data[icode].operand[3].mode;
18440 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18442 op0 = copy_to_reg (op0);
18443 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18445 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18447 op1 = copy_to_reg (op1);
18448 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18450 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18452 error ("shift must be an immediate");
18455 target = gen_reg_rtx (mode);
18456 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18463 case IX86_BUILTIN_MOVNTSD:
18464 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18466 case IX86_BUILTIN_MOVNTSS:
18467 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18469 case IX86_BUILTIN_INSERTQ:
18470 case IX86_BUILTIN_EXTRQ:
18471 icode = (fcode == IX86_BUILTIN_EXTRQ
18472 ? CODE_FOR_sse4a_extrq
18473 : CODE_FOR_sse4a_insertq);
18474 arg0 = CALL_EXPR_ARG (exp, 0);
18475 arg1 = CALL_EXPR_ARG (exp, 1);
18476 op0 = expand_normal (arg0);
18477 op1 = expand_normal (arg1);
18478 tmode = insn_data[icode].operand[0].mode;
18479 mode1 = insn_data[icode].operand[1].mode;
18480 mode2 = insn_data[icode].operand[2].mode;
18481 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18482 op0 = copy_to_mode_reg (mode1, op0);
18483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18484 op1 = copy_to_mode_reg (mode2, op1);
18485 if (optimize || target == 0
18486 || GET_MODE (target) != tmode
18487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18488 target = gen_reg_rtx (tmode);
18489 pat = GEN_FCN (icode) (target, op0, op1);
18495 case IX86_BUILTIN_EXTRQI:
18496 icode = CODE_FOR_sse4a_extrqi;
18497 arg0 = CALL_EXPR_ARG (exp, 0);
18498 arg1 = CALL_EXPR_ARG (exp, 1);
18499 arg2 = CALL_EXPR_ARG (exp, 2);
18500 op0 = expand_normal (arg0);
18501 op1 = expand_normal (arg1);
18502 op2 = expand_normal (arg2);
18503 tmode = insn_data[icode].operand[0].mode;
18504 mode1 = insn_data[icode].operand[1].mode;
18505 mode2 = insn_data[icode].operand[2].mode;
18506 mode3 = insn_data[icode].operand[3].mode;
18507 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18508 op0 = copy_to_mode_reg (mode1, op0);
18509 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18511 error ("index mask must be an immediate");
18512 return gen_reg_rtx (tmode);
18514 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18516 error ("length mask must be an immediate");
18517 return gen_reg_rtx (tmode);
18519 if (optimize || target == 0
18520 || GET_MODE (target) != tmode
18521 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18522 target = gen_reg_rtx (tmode);
18523 pat = GEN_FCN (icode) (target, op0, op1, op2);
18529 case IX86_BUILTIN_INSERTQI:
18530 icode = CODE_FOR_sse4a_insertqi;
18531 arg0 = CALL_EXPR_ARG (exp, 0);
18532 arg1 = CALL_EXPR_ARG (exp, 1);
18533 arg2 = CALL_EXPR_ARG (exp, 2);
18534 arg3 = CALL_EXPR_ARG (exp, 3);
18535 op0 = expand_normal (arg0);
18536 op1 = expand_normal (arg1);
18537 op2 = expand_normal (arg2);
18538 op3 = expand_normal (arg3);
18539 tmode = insn_data[icode].operand[0].mode;
18540 mode1 = insn_data[icode].operand[1].mode;
18541 mode2 = insn_data[icode].operand[2].mode;
18542 mode3 = insn_data[icode].operand[3].mode;
18543 mode4 = insn_data[icode].operand[4].mode;
18545 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18546 op0 = copy_to_mode_reg (mode1, op0);
18548 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18549 op1 = copy_to_mode_reg (mode2, op1);
18551 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18553 error ("index mask must be an immediate");
18554 return gen_reg_rtx (tmode);
18556 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18558 error ("length mask must be an immediate");
18559 return gen_reg_rtx (tmode);
18561 if (optimize || target == 0
18562 || GET_MODE (target) != tmode
18563 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18564 target = gen_reg_rtx (tmode);
18565 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18571 case IX86_BUILTIN_VEC_INIT_V2SI:
18572 case IX86_BUILTIN_VEC_INIT_V4HI:
18573 case IX86_BUILTIN_VEC_INIT_V8QI:
18574 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18576 case IX86_BUILTIN_VEC_EXT_V2DF:
18577 case IX86_BUILTIN_VEC_EXT_V2DI:
18578 case IX86_BUILTIN_VEC_EXT_V4SF:
18579 case IX86_BUILTIN_VEC_EXT_V4SI:
18580 case IX86_BUILTIN_VEC_EXT_V8HI:
18581 case IX86_BUILTIN_VEC_EXT_V2SI:
18582 case IX86_BUILTIN_VEC_EXT_V4HI:
18583 return ix86_expand_vec_ext_builtin (exp, target);
18585 case IX86_BUILTIN_VEC_SET_V8HI:
18586 case IX86_BUILTIN_VEC_SET_V4HI:
18587 return ix86_expand_vec_set_builtin (exp);
18593 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18594 if (d->code == fcode)
18596 /* Compares are treated specially. */
18597 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18598 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18599 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18600 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18601 return ix86_expand_sse_compare (d, exp, target);
18603 return ix86_expand_binop_builtin (d->icode, exp, target);
18606 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18607 if (d->code == fcode)
18608 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18610 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18611 if (d->code == fcode)
18612 return ix86_expand_sse_comi (d, exp, target);
18614 gcc_unreachable ();
18617 /* Returns a function decl for a vectorized version of the builtin function
18618 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18619 if it is not available. */
18622 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18625 enum machine_mode in_mode, out_mode;
18628 if (TREE_CODE (type_out) != VECTOR_TYPE
18629 || TREE_CODE (type_in) != VECTOR_TYPE)
18632 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18633 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18634 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18635 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18639 case BUILT_IN_SQRT:
18640 if (out_mode == DFmode && out_n == 2
18641 && in_mode == DFmode && in_n == 2)
18642 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18645 case BUILT_IN_SQRTF:
18646 if (out_mode == SFmode && out_n == 4
18647 && in_mode == SFmode && in_n == 4)
18648 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18651 case BUILT_IN_LRINTF:
18652 if (out_mode == SImode && out_n == 4
18653 && in_mode == SFmode && in_n == 4)
18654 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18664 /* Returns a decl of a function that implements conversion of the
18665 input vector of type TYPE, or NULL_TREE if it is not available. */
18668 ix86_builtin_conversion (enum tree_code code, tree type)
18670 if (TREE_CODE (type) != VECTOR_TYPE)
18676 switch (TYPE_MODE (type))
18679 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18684 case FIX_TRUNC_EXPR:
18685 switch (TYPE_MODE (type))
18688 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18698 /* Store OPERAND to the memory after reload is completed. This means
18699 that we can't easily use assign_stack_local. */
18701 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18705 gcc_assert (reload_completed);
18706 if (TARGET_RED_ZONE)
18708 result = gen_rtx_MEM (mode,
18709 gen_rtx_PLUS (Pmode,
18711 GEN_INT (-RED_ZONE_SIZE)));
18712 emit_move_insn (result, operand);
18714 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18720 operand = gen_lowpart (DImode, operand);
18724 gen_rtx_SET (VOIDmode,
18725 gen_rtx_MEM (DImode,
18726 gen_rtx_PRE_DEC (DImode,
18727 stack_pointer_rtx)),
18731 gcc_unreachable ();
18733 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18742 split_di (&operand, 1, operands, operands + 1);
18744 gen_rtx_SET (VOIDmode,
18745 gen_rtx_MEM (SImode,
18746 gen_rtx_PRE_DEC (Pmode,
18747 stack_pointer_rtx)),
18750 gen_rtx_SET (VOIDmode,
18751 gen_rtx_MEM (SImode,
18752 gen_rtx_PRE_DEC (Pmode,
18753 stack_pointer_rtx)),
18758 /* Store HImodes as SImodes. */
18759 operand = gen_lowpart (SImode, operand);
18763 gen_rtx_SET (VOIDmode,
18764 gen_rtx_MEM (GET_MODE (operand),
18765 gen_rtx_PRE_DEC (SImode,
18766 stack_pointer_rtx)),
18770 gcc_unreachable ();
18772 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18777 /* Free operand from the memory. */
18779 ix86_free_from_memory (enum machine_mode mode)
18781 if (!TARGET_RED_ZONE)
18785 if (mode == DImode || TARGET_64BIT)
18789 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18790 to pop or add instruction if registers are available. */
18791 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18792 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18797 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18798 QImode must go into class Q_REGS.
18799 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18800 movdf to do mem-to-mem moves through integer regs. */
18802 ix86_preferred_reload_class (rtx x, enum reg_class class)
18804 enum machine_mode mode = GET_MODE (x);
18806 /* We're only allowed to return a subclass of CLASS. Many of the
18807 following checks fail for NO_REGS, so eliminate that early. */
18808 if (class == NO_REGS)
18811 /* All classes can load zeros. */
18812 if (x == CONST0_RTX (mode))
18815 /* Force constants into memory if we are loading a (nonzero) constant into
18816 an MMX or SSE register. This is because there are no MMX/SSE instructions
18817 to load from a constant. */
18819 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18822 /* Prefer SSE regs only, if we can use them for math. */
18823 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18824 return SSE_CLASS_P (class) ? class : NO_REGS;
18826 /* Floating-point constants need more complex checks. */
18827 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18829 /* General regs can load everything. */
18830 if (reg_class_subset_p (class, GENERAL_REGS))
18833 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18834 zero above. We only want to wind up preferring 80387 registers if
18835 we plan on doing computation with them. */
18837 && standard_80387_constant_p (x))
18839 /* Limit class to non-sse. */
18840 if (class == FLOAT_SSE_REGS)
18842 if (class == FP_TOP_SSE_REGS)
18844 if (class == FP_SECOND_SSE_REGS)
18845 return FP_SECOND_REG;
18846 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18853 /* Generally when we see PLUS here, it's the function invariant
18854 (plus soft-fp const_int). Which can only be computed into general
18856 if (GET_CODE (x) == PLUS)
18857 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18859 /* QImode constants are easy to load, but non-constant QImode data
18860 must go into Q_REGS. */
18861 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18863 if (reg_class_subset_p (class, Q_REGS))
18865 if (reg_class_subset_p (Q_REGS, class))
18873 /* Discourage putting floating-point values in SSE registers unless
18874 SSE math is being used, and likewise for the 387 registers. */
18876 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18878 enum machine_mode mode = GET_MODE (x);
18880 /* Restrict the output reload class to the register bank that we are doing
18881 math on. If we would like not to return a subset of CLASS, reject this
18882 alternative: if reload cannot do this, it will still use its choice. */
18883 mode = GET_MODE (x);
18884 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18885 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18887 if (X87_FLOAT_MODE_P (mode))
18889 if (class == FP_TOP_SSE_REGS)
18891 else if (class == FP_SECOND_SSE_REGS)
18892 return FP_SECOND_REG;
18894 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18900 /* If we are copying between general and FP registers, we need a memory
18901 location. The same is true for SSE and MMX registers.
18903 The macro can't work reliably when one of the CLASSES is class containing
18904 registers from multiple units (SSE, MMX, integer). We avoid this by never
18905 combining those units in single alternative in the machine description.
18906 Ensure that this constraint holds to avoid unexpected surprises.
18908 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18909 enforce these sanity checks. */
18912 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18913 enum machine_mode mode, int strict)
18915 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18916 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18917 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18918 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18919 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18920 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18922 gcc_assert (!strict);
18926 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18929 /* ??? This is a lie. We do have moves between mmx/general, and for
18930 mmx/sse2. But by saying we need secondary memory we discourage the
18931 register allocator from using the mmx registers unless needed. */
18932 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18935 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18937 /* SSE1 doesn't have any direct moves from other classes. */
18941 /* If the target says that inter-unit moves are more expensive
18942 than moving through memory, then don't generate them. */
18943 if (!TARGET_INTER_UNIT_MOVES)
18946 /* Between SSE and general, we have moves no larger than word size. */
18947 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18954 /* Return true if the registers in CLASS cannot represent the change from
18955 modes FROM to TO. */
18958 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18959 enum reg_class class)
18964 /* x87 registers can't do subreg at all, as all values are reformatted
18965 to extended precision. */
18966 if (MAYBE_FLOAT_CLASS_P (class))
18969 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18971 /* Vector registers do not support QI or HImode loads. If we don't
18972 disallow a change to these modes, reload will assume it's ok to
18973 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18974 the vec_dupv4hi pattern. */
18975 if (GET_MODE_SIZE (from) < 4)
18978 /* Vector registers do not support subreg with nonzero offsets, which
18979 are otherwise valid for integer registers. Since we can't see
18980 whether we have a nonzero offset from here, prohibit all
18981 nonparadoxical subregs changing size. */
18982 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18989 /* Return the cost of moving data from a register in class CLASS1 to
18990 one in class CLASS2.
18992 It is not required that the cost always equal 2 when FROM is the same as TO;
18993 on some machines it is expensive to move between registers if they are not
18994 general registers. */
18997 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18998 enum reg_class class2)
19000 /* In case we require secondary memory, compute cost of the store followed
19001 by load. In order to avoid bad register allocation choices, we need
19002 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19004 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19008 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19009 MEMORY_MOVE_COST (mode, class1, 1));
19010 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19011 MEMORY_MOVE_COST (mode, class2, 1));
19013 /* In case of copying from general_purpose_register we may emit multiple
19014 stores followed by single load causing memory size mismatch stall.
19015 Count this as arbitrarily high cost of 20. */
19016 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19019 /* In the case of FP/MMX moves, the registers actually overlap, and we
19020 have to switch modes in order to treat them differently. */
19021 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19022 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19028 /* Moves between SSE/MMX and integer unit are expensive. */
19029 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19030 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19031 return ix86_cost->mmxsse_to_integer;
19032 if (MAYBE_FLOAT_CLASS_P (class1))
19033 return ix86_cost->fp_move;
19034 if (MAYBE_SSE_CLASS_P (class1))
19035 return ix86_cost->sse_move;
19036 if (MAYBE_MMX_CLASS_P (class1))
19037 return ix86_cost->mmx_move;
19041 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19044 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19046 /* Flags and only flags can only hold CCmode values. */
19047 if (CC_REGNO_P (regno))
19048 return GET_MODE_CLASS (mode) == MODE_CC;
19049 if (GET_MODE_CLASS (mode) == MODE_CC
19050 || GET_MODE_CLASS (mode) == MODE_RANDOM
19051 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19053 if (FP_REGNO_P (regno))
19054 return VALID_FP_MODE_P (mode);
19055 if (SSE_REGNO_P (regno))
19057 /* We implement the move patterns for all vector modes into and
19058 out of SSE registers, even when no operation instructions
19060 return (VALID_SSE_REG_MODE (mode)
19061 || VALID_SSE2_REG_MODE (mode)
19062 || VALID_MMX_REG_MODE (mode)
19063 || VALID_MMX_REG_MODE_3DNOW (mode));
19065 if (MMX_REGNO_P (regno))
19067 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19068 so if the register is available at all, then we can move data of
19069 the given mode into or out of it. */
19070 return (VALID_MMX_REG_MODE (mode)
19071 || VALID_MMX_REG_MODE_3DNOW (mode));
19074 if (mode == QImode)
19076 /* Take care for QImode values - they can be in non-QI regs,
19077 but then they do cause partial register stalls. */
19078 if (regno < 4 || TARGET_64BIT)
19080 if (!TARGET_PARTIAL_REG_STALL)
19082 return reload_in_progress || reload_completed;
19084 /* We handle both integer and floats in the general purpose registers. */
19085 else if (VALID_INT_MODE_P (mode))
19087 else if (VALID_FP_MODE_P (mode))
19089 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19090 on to use that value in smaller contexts, this can easily force a
19091 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19092 supporting DImode, allow it. */
19093 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19099 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19100 tieable integer mode. */
19103 ix86_tieable_integer_mode_p (enum machine_mode mode)
19112 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19115 return TARGET_64BIT;
19122 /* Return true if MODE1 is accessible in a register that can hold MODE2
19123 without copying. That is, all register classes that can hold MODE2
19124 can also hold MODE1. */
19127 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19129 if (mode1 == mode2)
19132 if (ix86_tieable_integer_mode_p (mode1)
19133 && ix86_tieable_integer_mode_p (mode2))
19136 /* MODE2 being XFmode implies fp stack or general regs, which means we
19137 can tie any smaller floating point modes to it. Note that we do not
19138 tie this with TFmode. */
19139 if (mode2 == XFmode)
19140 return mode1 == SFmode || mode1 == DFmode;
19142 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19143 that we can tie it with SFmode. */
19144 if (mode2 == DFmode)
19145 return mode1 == SFmode;
19147 /* If MODE2 is only appropriate for an SSE register, then tie with
19148 any other mode acceptable to SSE registers. */
19149 if (GET_MODE_SIZE (mode2) == 16
19150 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19151 return (GET_MODE_SIZE (mode1) == 16
19152 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19154 /* If MODE2 is appropriate for an MMX register, then tie
19155 with any other mode acceptable to MMX registers. */
19156 if (GET_MODE_SIZE (mode2) == 8
19157 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19158 return (GET_MODE_SIZE (mode1) == 8
19159 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19164 /* Return the cost of moving data of mode M between a
19165 register and memory. A value of 2 is the default; this cost is
19166 relative to those in `REGISTER_MOVE_COST'.
19168 If moving between registers and memory is more expensive than
19169 between two registers, you should define this macro to express the
19172 Model also increased moving costs of QImode registers in non
19176 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19178 if (FLOAT_CLASS_P (class))
19195 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19197 if (SSE_CLASS_P (class))
19200 switch (GET_MODE_SIZE (mode))
19214 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19216 if (MMX_CLASS_P (class))
19219 switch (GET_MODE_SIZE (mode))
19230 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19232 switch (GET_MODE_SIZE (mode))
19236 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19237 : ix86_cost->movzbl_load);
19239 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19240 : ix86_cost->int_store[0] + 4);
19243 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19245 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19246 if (mode == TFmode)
19248 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19249 * (((int) GET_MODE_SIZE (mode)
19250 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19254 /* Compute a (partial) cost for rtx X. Return true if the complete
19255 cost has been computed, and false if subexpressions should be
19256 scanned. In either case, *TOTAL contains the cost result. */
19259 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19261 enum machine_mode mode = GET_MODE (x);
19269 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19271 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19273 else if (flag_pic && SYMBOLIC_CONST (x)
19275 || (!GET_CODE (x) != LABEL_REF
19276 && (GET_CODE (x) != SYMBOL_REF
19277 || !SYMBOL_REF_LOCAL_P (x)))))
19284 if (mode == VOIDmode)
19287 switch (standard_80387_constant_p (x))
19292 default: /* Other constants */
19297 /* Start with (MEM (SYMBOL_REF)), since that's where
19298 it'll probably end up. Add a penalty for size. */
19299 *total = (COSTS_N_INSNS (1)
19300 + (flag_pic != 0 && !TARGET_64BIT)
19301 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19307 /* The zero extensions is often completely free on x86_64, so make
19308 it as cheap as possible. */
19309 if (TARGET_64BIT && mode == DImode
19310 && GET_MODE (XEXP (x, 0)) == SImode)
19312 else if (TARGET_ZERO_EXTEND_WITH_AND)
19313 *total = ix86_cost->add;
19315 *total = ix86_cost->movzx;
19319 *total = ix86_cost->movsx;
19323 if (CONST_INT_P (XEXP (x, 1))
19324 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19326 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19329 *total = ix86_cost->add;
19332 if ((value == 2 || value == 3)
19333 && ix86_cost->lea <= ix86_cost->shift_const)
19335 *total = ix86_cost->lea;
19345 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19347 if (CONST_INT_P (XEXP (x, 1)))
19349 if (INTVAL (XEXP (x, 1)) > 32)
19350 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19352 *total = ix86_cost->shift_const * 2;
19356 if (GET_CODE (XEXP (x, 1)) == AND)
19357 *total = ix86_cost->shift_var * 2;
19359 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19364 if (CONST_INT_P (XEXP (x, 1)))
19365 *total = ix86_cost->shift_const;
19367 *total = ix86_cost->shift_var;
19372 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19374 /* ??? SSE scalar cost should be used here. */
19375 *total = ix86_cost->fmul;
19378 else if (X87_FLOAT_MODE_P (mode))
19380 *total = ix86_cost->fmul;
19383 else if (FLOAT_MODE_P (mode))
19385 /* ??? SSE vector cost should be used here. */
19386 *total = ix86_cost->fmul;
19391 rtx op0 = XEXP (x, 0);
19392 rtx op1 = XEXP (x, 1);
19394 if (CONST_INT_P (XEXP (x, 1)))
19396 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19397 for (nbits = 0; value != 0; value &= value - 1)
19401 /* This is arbitrary. */
19404 /* Compute costs correctly for widening multiplication. */
19405 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19406 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19407 == GET_MODE_SIZE (mode))
19409 int is_mulwiden = 0;
19410 enum machine_mode inner_mode = GET_MODE (op0);
19412 if (GET_CODE (op0) == GET_CODE (op1))
19413 is_mulwiden = 1, op1 = XEXP (op1, 0);
19414 else if (CONST_INT_P (op1))
19416 if (GET_CODE (op0) == SIGN_EXTEND)
19417 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19420 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19424 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19427 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19428 + nbits * ix86_cost->mult_bit
19429 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19438 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19439 /* ??? SSE cost should be used here. */
19440 *total = ix86_cost->fdiv;
19441 else if (X87_FLOAT_MODE_P (mode))
19442 *total = ix86_cost->fdiv;
19443 else if (FLOAT_MODE_P (mode))
19444 /* ??? SSE vector cost should be used here. */
19445 *total = ix86_cost->fdiv;
19447 *total = ix86_cost->divide[MODE_INDEX (mode)];
19451 if (GET_MODE_CLASS (mode) == MODE_INT
19452 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19454 if (GET_CODE (XEXP (x, 0)) == PLUS
19455 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19456 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19457 && CONSTANT_P (XEXP (x, 1)))
19459 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19460 if (val == 2 || val == 4 || val == 8)
19462 *total = ix86_cost->lea;
19463 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19464 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19466 *total += rtx_cost (XEXP (x, 1), outer_code);
19470 else if (GET_CODE (XEXP (x, 0)) == MULT
19471 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19473 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19474 if (val == 2 || val == 4 || val == 8)
19476 *total = ix86_cost->lea;
19477 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19478 *total += rtx_cost (XEXP (x, 1), outer_code);
19482 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19484 *total = ix86_cost->lea;
19485 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19486 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19487 *total += rtx_cost (XEXP (x, 1), outer_code);
19494 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19496 /* ??? SSE cost should be used here. */
19497 *total = ix86_cost->fadd;
19500 else if (X87_FLOAT_MODE_P (mode))
19502 *total = ix86_cost->fadd;
19505 else if (FLOAT_MODE_P (mode))
19507 /* ??? SSE vector cost should be used here. */
19508 *total = ix86_cost->fadd;
19516 if (!TARGET_64BIT && mode == DImode)
19518 *total = (ix86_cost->add * 2
19519 + (rtx_cost (XEXP (x, 0), outer_code)
19520 << (GET_MODE (XEXP (x, 0)) != DImode))
19521 + (rtx_cost (XEXP (x, 1), outer_code)
19522 << (GET_MODE (XEXP (x, 1)) != DImode)));
19528 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19530 /* ??? SSE cost should be used here. */
19531 *total = ix86_cost->fchs;
19534 else if (X87_FLOAT_MODE_P (mode))
19536 *total = ix86_cost->fchs;
19539 else if (FLOAT_MODE_P (mode))
19541 /* ??? SSE vector cost should be used here. */
19542 *total = ix86_cost->fchs;
19548 if (!TARGET_64BIT && mode == DImode)
19549 *total = ix86_cost->add * 2;
19551 *total = ix86_cost->add;
19555 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19556 && XEXP (XEXP (x, 0), 1) == const1_rtx
19557 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19558 && XEXP (x, 1) == const0_rtx)
19560 /* This kind of construct is implemented using test[bwl].
19561 Treat it as if we had an AND. */
19562 *total = (ix86_cost->add
19563 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19564 + rtx_cost (const1_rtx, outer_code));
19570 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19575 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19576 /* ??? SSE cost should be used here. */
19577 *total = ix86_cost->fabs;
19578 else if (X87_FLOAT_MODE_P (mode))
19579 *total = ix86_cost->fabs;
19580 else if (FLOAT_MODE_P (mode))
19581 /* ??? SSE vector cost should be used here. */
19582 *total = ix86_cost->fabs;
19586 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19587 /* ??? SSE cost should be used here. */
19588 *total = ix86_cost->fsqrt;
19589 else if (X87_FLOAT_MODE_P (mode))
19590 *total = ix86_cost->fsqrt;
19591 else if (FLOAT_MODE_P (mode))
19592 /* ??? SSE vector cost should be used here. */
19593 *total = ix86_cost->fsqrt;
19597 if (XINT (x, 1) == UNSPEC_TP)
19608 static int current_machopic_label_num;
19610 /* Given a symbol name and its associated stub, write out the
19611 definition of the stub. */
19614 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19616 unsigned int length;
19617 char *binder_name, *symbol_name, lazy_ptr_name[32];
19618 int label = ++current_machopic_label_num;
19620 /* For 64-bit we shouldn't get here. */
19621 gcc_assert (!TARGET_64BIT);
19623 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19624 symb = (*targetm.strip_name_encoding) (symb);
19626 length = strlen (stub);
19627 binder_name = alloca (length + 32);
19628 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19630 length = strlen (symb);
19631 symbol_name = alloca (length + 32);
19632 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19634 sprintf (lazy_ptr_name, "L%d$lz", label);
19637 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19639 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19641 fprintf (file, "%s:\n", stub);
19642 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19646 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19647 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19648 fprintf (file, "\tjmp\t*%%edx\n");
19651 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19653 fprintf (file, "%s:\n", binder_name);
19657 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19658 fprintf (file, "\tpushl\t%%eax\n");
19661 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19663 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19665 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19666 fprintf (file, "%s:\n", lazy_ptr_name);
19667 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19668 fprintf (file, "\t.long %s\n", binder_name);
19672 darwin_x86_file_end (void)
19674 darwin_file_end ();
19677 #endif /* TARGET_MACHO */
19679 /* Order the registers for register allocator. */
19682 x86_order_regs_for_local_alloc (void)
19687 /* First allocate the local general purpose registers. */
19688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19689 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19690 reg_alloc_order [pos++] = i;
19692 /* Global general purpose registers. */
19693 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19694 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19695 reg_alloc_order [pos++] = i;
19697 /* x87 registers come first in case we are doing FP math
19699 if (!TARGET_SSE_MATH)
19700 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19701 reg_alloc_order [pos++] = i;
19703 /* SSE registers. */
19704 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19705 reg_alloc_order [pos++] = i;
19706 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19707 reg_alloc_order [pos++] = i;
19709 /* x87 registers. */
19710 if (TARGET_SSE_MATH)
19711 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19712 reg_alloc_order [pos++] = i;
19714 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19715 reg_alloc_order [pos++] = i;
19717 /* Initialize the rest of array as we do not allocate some registers
19719 while (pos < FIRST_PSEUDO_REGISTER)
19720 reg_alloc_order [pos++] = 0;
19723 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19724 struct attribute_spec.handler. */
19726 ix86_handle_struct_attribute (tree *node, tree name,
19727 tree args ATTRIBUTE_UNUSED,
19728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19731 if (DECL_P (*node))
19733 if (TREE_CODE (*node) == TYPE_DECL)
19734 type = &TREE_TYPE (*node);
19739 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19740 || TREE_CODE (*type) == UNION_TYPE)))
19742 warning (OPT_Wattributes, "%qs attribute ignored",
19743 IDENTIFIER_POINTER (name));
19744 *no_add_attrs = true;
19747 else if ((is_attribute_p ("ms_struct", name)
19748 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19749 || ((is_attribute_p ("gcc_struct", name)
19750 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19752 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19753 IDENTIFIER_POINTER (name));
19754 *no_add_attrs = true;
19761 ix86_ms_bitfield_layout_p (tree record_type)
19763 return (TARGET_MS_BITFIELD_LAYOUT &&
19764 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19765 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19768 /* Returns an expression indicating where the this parameter is
19769 located on entry to the FUNCTION. */
19772 x86_this_parameter (tree function)
19774 tree type = TREE_TYPE (function);
19775 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19779 const int *parm_regs;
19781 if (TARGET_64BIT_MS_ABI)
19782 parm_regs = x86_64_ms_abi_int_parameter_registers;
19784 parm_regs = x86_64_int_parameter_registers;
19785 return gen_rtx_REG (DImode, parm_regs[aggr]);
19788 if (ix86_function_regparm (type, function) > 0
19789 && !type_has_variadic_args_p (type))
19792 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19794 return gen_rtx_REG (SImode, regno);
19797 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
19800 /* Determine whether x86_output_mi_thunk can succeed. */
19803 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19804 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19805 HOST_WIDE_INT vcall_offset, tree function)
19807 /* 64-bit can handle anything. */
19811 /* For 32-bit, everything's fine if we have one free register. */
19812 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19815 /* Need a free register for vcall_offset. */
19819 /* Need a free register for GOT references. */
19820 if (flag_pic && !(*targetm.binds_local_p) (function))
19823 /* Otherwise ok. */
19827 /* Output the assembler code for a thunk function. THUNK_DECL is the
19828 declaration for the thunk function itself, FUNCTION is the decl for
19829 the target function. DELTA is an immediate constant offset to be
19830 added to THIS. If VCALL_OFFSET is nonzero, the word at
19831 *(*this + vcall_offset) should be added to THIS. */
19834 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19835 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19836 HOST_WIDE_INT vcall_offset, tree function)
19839 rtx this = x86_this_parameter (function);
19842 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19843 pull it in now and let DELTA benefit. */
19846 else if (vcall_offset)
19848 /* Put the this parameter into %eax. */
19850 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19851 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19854 this_reg = NULL_RTX;
19856 /* Adjust the this parameter by a fixed constant. */
19859 xops[0] = GEN_INT (delta);
19860 xops[1] = this_reg ? this_reg : this;
19863 if (!x86_64_general_operand (xops[0], DImode))
19865 tmp = gen_rtx_REG (DImode, R10_REG);
19867 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19871 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19874 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19877 /* Adjust the this parameter by a value stored in the vtable. */
19881 tmp = gen_rtx_REG (DImode, R10_REG);
19884 int tmp_regno = 2 /* ECX */;
19885 if (lookup_attribute ("fastcall",
19886 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19887 tmp_regno = 0 /* EAX */;
19888 tmp = gen_rtx_REG (SImode, tmp_regno);
19891 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19894 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19896 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19898 /* Adjust the this parameter. */
19899 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19900 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19902 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19903 xops[0] = GEN_INT (vcall_offset);
19905 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19906 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19908 xops[1] = this_reg;
19910 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19912 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19915 /* If necessary, drop THIS back to its stack slot. */
19916 if (this_reg && this_reg != this)
19918 xops[0] = this_reg;
19920 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19923 xops[0] = XEXP (DECL_RTL (function), 0);
19926 if (!flag_pic || (*targetm.binds_local_p) (function))
19927 output_asm_insn ("jmp\t%P0", xops);
19928 /* All thunks should be in the same object as their target,
19929 and thus binds_local_p should be true. */
19930 else if (TARGET_64BIT_MS_ABI)
19931 gcc_unreachable ();
19934 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19935 tmp = gen_rtx_CONST (Pmode, tmp);
19936 tmp = gen_rtx_MEM (QImode, tmp);
19938 output_asm_insn ("jmp\t%A0", xops);
19943 if (!flag_pic || (*targetm.binds_local_p) (function))
19944 output_asm_insn ("jmp\t%P0", xops);
19949 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19950 tmp = (gen_rtx_SYMBOL_REF
19952 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19953 tmp = gen_rtx_MEM (QImode, tmp);
19955 output_asm_insn ("jmp\t%0", xops);
19958 #endif /* TARGET_MACHO */
19960 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19961 output_set_got (tmp, NULL_RTX);
19964 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19965 output_asm_insn ("jmp\t{*}%1", xops);
19971 x86_file_start (void)
19973 default_file_start ();
19975 darwin_file_start ();
19977 if (X86_FILE_START_VERSION_DIRECTIVE)
19978 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19979 if (X86_FILE_START_FLTUSED)
19980 fputs ("\t.global\t__fltused\n", asm_out_file);
19981 if (ix86_asm_dialect == ASM_INTEL)
19982 fputs ("\t.intel_syntax\n", asm_out_file);
19986 x86_field_alignment (tree field, int computed)
19988 enum machine_mode mode;
19989 tree type = TREE_TYPE (field);
19991 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19993 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19994 ? get_inner_array_type (type) : type);
19995 if (mode == DFmode || mode == DCmode
19996 || GET_MODE_CLASS (mode) == MODE_INT
19997 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19998 return MIN (32, computed);
20002 /* Output assembler code to FILE to increment profiler label # LABELNO
20003 for profiling a function entry. */
20005 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20009 #ifndef NO_PROFILE_COUNTERS
20010 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20013 if (!TARGET_64BIT_MS_ABI && flag_pic)
20014 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20016 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20020 #ifndef NO_PROFILE_COUNTERS
20021 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20022 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20024 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20028 #ifndef NO_PROFILE_COUNTERS
20029 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20030 PROFILE_COUNT_REGISTER);
20032 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20036 /* We don't have exact information about the insn sizes, but we may assume
20037 quite safely that we are informed about all 1 byte insns and memory
20038 address sizes. This is enough to eliminate unnecessary padding in
20042 min_insn_size (rtx insn)
20046 if (!INSN_P (insn) || !active_insn_p (insn))
20049 /* Discard alignments we've emit and jump instructions. */
20050 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20051 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20054 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20055 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20058 /* Important case - calls are always 5 bytes.
20059 It is common to have many calls in the row. */
20061 && symbolic_reference_mentioned_p (PATTERN (insn))
20062 && !SIBLING_CALL_P (insn))
20064 if (get_attr_length (insn) <= 1)
20067 /* For normal instructions we may rely on the sizes of addresses
20068 and the presence of symbol to require 4 bytes of encoding.
20069 This is not the case for jumps where references are PC relative. */
20070 if (!JUMP_P (insn))
20072 l = get_attr_length_address (insn);
20073 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20082 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20086 ix86_avoid_jump_misspredicts (void)
20088 rtx insn, start = get_insns ();
20089 int nbytes = 0, njumps = 0;
20092 /* Look for all minimal intervals of instructions containing 4 jumps.
20093 The intervals are bounded by START and INSN. NBYTES is the total
20094 size of instructions in the interval including INSN and not including
20095 START. When the NBYTES is smaller than 16 bytes, it is possible
20096 that the end of START and INSN ends up in the same 16byte page.
20098 The smallest offset in the page INSN can start is the case where START
20099 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20100 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20102 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20105 nbytes += min_insn_size (insn);
20107 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20108 INSN_UID (insn), min_insn_size (insn));
20110 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20111 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20119 start = NEXT_INSN (start);
20120 if ((JUMP_P (start)
20121 && GET_CODE (PATTERN (start)) != ADDR_VEC
20122 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20124 njumps--, isjump = 1;
20127 nbytes -= min_insn_size (start);
20129 gcc_assert (njumps >= 0);
20131 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20132 INSN_UID (start), INSN_UID (insn), nbytes);
20134 if (njumps == 3 && isjump && nbytes < 16)
20136 int padsize = 15 - nbytes + min_insn_size (insn);
20139 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20140 INSN_UID (insn), padsize);
20141 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20146 /* AMD Athlon works faster
20147 when RET is not destination of conditional jump or directly preceded
20148 by other jump instruction. We avoid the penalty by inserting NOP just
20149 before the RET instructions in such cases. */
20151 ix86_pad_returns (void)
20156 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20158 basic_block bb = e->src;
20159 rtx ret = BB_END (bb);
20161 bool replace = false;
20163 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20164 || !maybe_hot_bb_p (bb))
20166 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20167 if (active_insn_p (prev) || LABEL_P (prev))
20169 if (prev && LABEL_P (prev))
20174 FOR_EACH_EDGE (e, ei, bb->preds)
20175 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20176 && !(e->flags & EDGE_FALLTHRU))
20181 prev = prev_active_insn (ret);
20183 && ((JUMP_P (prev) && any_condjump_p (prev))
20186 /* Empty functions get branch mispredict even when the jump destination
20187 is not visible to us. */
20188 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20193 emit_insn_before (gen_return_internal_long (), ret);
20199 /* Implement machine specific optimizations. We implement padding of returns
20200 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20204 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20205 ix86_pad_returns ();
20206 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20207 ix86_avoid_jump_misspredicts ();
20210 /* Return nonzero when QImode register that must be represented via REX prefix
20213 x86_extended_QIreg_mentioned_p (rtx insn)
20216 extract_insn_cached (insn);
20217 for (i = 0; i < recog_data.n_operands; i++)
20218 if (REG_P (recog_data.operand[i])
20219 && REGNO (recog_data.operand[i]) >= 4)
20224 /* Return nonzero when P points to register encoded via REX prefix.
20225 Called via for_each_rtx. */
20227 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20229 unsigned int regno;
20232 regno = REGNO (*p);
20233 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20236 /* Return true when INSN mentions register that must be encoded using REX
20239 x86_extended_reg_mentioned_p (rtx insn)
20241 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20244 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20245 optabs would emit if we didn't have TFmode patterns. */
20248 x86_emit_floatuns (rtx operands[2])
20250 rtx neglab, donelab, i0, i1, f0, in, out;
20251 enum machine_mode mode, inmode;
20253 inmode = GET_MODE (operands[1]);
20254 gcc_assert (inmode == SImode || inmode == DImode);
20257 in = force_reg (inmode, operands[1]);
20258 mode = GET_MODE (out);
20259 neglab = gen_label_rtx ();
20260 donelab = gen_label_rtx ();
20261 f0 = gen_reg_rtx (mode);
20263 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20265 expand_float (out, in, 0);
20267 emit_jump_insn (gen_jump (donelab));
20270 emit_label (neglab);
20272 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20274 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20276 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20278 expand_float (f0, i0, 0);
20280 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20282 emit_label (donelab);
20285 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20286 with all elements equal to VAR. Return true if successful. */
20289 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20290 rtx target, rtx val)
20292 enum machine_mode smode, wsmode, wvmode;
20307 val = force_reg (GET_MODE_INNER (mode), val);
20308 x = gen_rtx_VEC_DUPLICATE (mode, val);
20309 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20315 if (TARGET_SSE || TARGET_3DNOW_A)
20317 val = gen_lowpart (SImode, val);
20318 x = gen_rtx_TRUNCATE (HImode, val);
20319 x = gen_rtx_VEC_DUPLICATE (mode, x);
20320 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20342 /* Extend HImode to SImode using a paradoxical SUBREG. */
20343 tmp1 = gen_reg_rtx (SImode);
20344 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20345 /* Insert the SImode value as low element of V4SImode vector. */
20346 tmp2 = gen_reg_rtx (V4SImode);
20347 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20348 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20349 CONST0_RTX (V4SImode),
20351 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20352 /* Cast the V4SImode vector back to a V8HImode vector. */
20353 tmp1 = gen_reg_rtx (V8HImode);
20354 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20355 /* Duplicate the low short through the whole low SImode word. */
20356 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20357 /* Cast the V8HImode vector back to a V4SImode vector. */
20358 tmp2 = gen_reg_rtx (V4SImode);
20359 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20360 /* Replicate the low element of the V4SImode vector. */
20361 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20362 /* Cast the V2SImode back to V8HImode, and store in target. */
20363 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20374 /* Extend QImode to SImode using a paradoxical SUBREG. */
20375 tmp1 = gen_reg_rtx (SImode);
20376 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20377 /* Insert the SImode value as low element of V4SImode vector. */
20378 tmp2 = gen_reg_rtx (V4SImode);
20379 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20380 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20381 CONST0_RTX (V4SImode),
20383 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20384 /* Cast the V4SImode vector back to a V16QImode vector. */
20385 tmp1 = gen_reg_rtx (V16QImode);
20386 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20387 /* Duplicate the low byte through the whole low SImode word. */
20388 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20389 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20390 /* Cast the V16QImode vector back to a V4SImode vector. */
20391 tmp2 = gen_reg_rtx (V4SImode);
20392 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20393 /* Replicate the low element of the V4SImode vector. */
20394 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20395 /* Cast the V2SImode back to V16QImode, and store in target. */
20396 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20404 /* Replicate the value once into the next wider mode and recurse. */
20405 val = convert_modes (wsmode, smode, val, true);
20406 x = expand_simple_binop (wsmode, ASHIFT, val,
20407 GEN_INT (GET_MODE_BITSIZE (smode)),
20408 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20409 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20411 x = gen_reg_rtx (wvmode);
20412 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20413 gcc_unreachable ();
20414 emit_move_insn (target, gen_lowpart (mode, x));
20422 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20423 whose ONE_VAR element is VAR, and other elements are zero. Return true
20427 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20428 rtx target, rtx var, int one_var)
20430 enum machine_mode vsimode;
20446 var = force_reg (GET_MODE_INNER (mode), var);
20447 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20448 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20453 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20454 new_target = gen_reg_rtx (mode);
20456 new_target = target;
20457 var = force_reg (GET_MODE_INNER (mode), var);
20458 x = gen_rtx_VEC_DUPLICATE (mode, var);
20459 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20460 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20463 /* We need to shuffle the value to the correct position, so
20464 create a new pseudo to store the intermediate result. */
20466 /* With SSE2, we can use the integer shuffle insns. */
20467 if (mode != V4SFmode && TARGET_SSE2)
20469 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20471 GEN_INT (one_var == 1 ? 0 : 1),
20472 GEN_INT (one_var == 2 ? 0 : 1),
20473 GEN_INT (one_var == 3 ? 0 : 1)));
20474 if (target != new_target)
20475 emit_move_insn (target, new_target);
20479 /* Otherwise convert the intermediate result to V4SFmode and
20480 use the SSE1 shuffle instructions. */
20481 if (mode != V4SFmode)
20483 tmp = gen_reg_rtx (V4SFmode);
20484 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20489 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20491 GEN_INT (one_var == 1 ? 0 : 1),
20492 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20493 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20495 if (mode != V4SFmode)
20496 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20497 else if (tmp != target)
20498 emit_move_insn (target, tmp);
20500 else if (target != new_target)
20501 emit_move_insn (target, new_target);
20506 vsimode = V4SImode;
20512 vsimode = V2SImode;
20518 /* Zero extend the variable element to SImode and recurse. */
20519 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20521 x = gen_reg_rtx (vsimode);
20522 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20524 gcc_unreachable ();
20526 emit_move_insn (target, gen_lowpart (mode, x));
20534 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20535 consisting of the values in VALS. It is known that all elements
20536 except ONE_VAR are constants. Return true if successful. */
20539 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20540 rtx target, rtx vals, int one_var)
20542 rtx var = XVECEXP (vals, 0, one_var);
20543 enum machine_mode wmode;
20546 const_vec = copy_rtx (vals);
20547 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20548 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20556 /* For the two element vectors, it's just as easy to use
20557 the general case. */
20573 /* There's no way to set one QImode entry easily. Combine
20574 the variable value with its adjacent constant value, and
20575 promote to an HImode set. */
20576 x = XVECEXP (vals, 0, one_var ^ 1);
20579 var = convert_modes (HImode, QImode, var, true);
20580 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20581 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20582 x = GEN_INT (INTVAL (x) & 0xff);
20586 var = convert_modes (HImode, QImode, var, true);
20587 x = gen_int_mode (INTVAL (x) << 8, HImode);
20589 if (x != const0_rtx)
20590 var = expand_simple_binop (HImode, IOR, var, x, var,
20591 1, OPTAB_LIB_WIDEN);
20593 x = gen_reg_rtx (wmode);
20594 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20595 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20597 emit_move_insn (target, gen_lowpart (mode, x));
20604 emit_move_insn (target, const_vec);
20605 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20609 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20610 all values variable, and none identical. */
20613 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20614 rtx target, rtx vals)
20616 enum machine_mode half_mode = GET_MODE_INNER (mode);
20617 rtx op0 = NULL, op1 = NULL;
20618 bool use_vec_concat = false;
20624 if (!mmx_ok && !TARGET_SSE)
20630 /* For the two element vectors, we always implement VEC_CONCAT. */
20631 op0 = XVECEXP (vals, 0, 0);
20632 op1 = XVECEXP (vals, 0, 1);
20633 use_vec_concat = true;
20637 half_mode = V2SFmode;
20640 half_mode = V2SImode;
20646 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20647 Recurse to load the two halves. */
20649 op0 = gen_reg_rtx (half_mode);
20650 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20651 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20653 op1 = gen_reg_rtx (half_mode);
20654 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20655 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20657 use_vec_concat = true;
20668 gcc_unreachable ();
20671 if (use_vec_concat)
20673 if (!register_operand (op0, half_mode))
20674 op0 = force_reg (half_mode, op0);
20675 if (!register_operand (op1, half_mode))
20676 op1 = force_reg (half_mode, op1);
20678 emit_insn (gen_rtx_SET (VOIDmode, target,
20679 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20683 int i, j, n_elts, n_words, n_elt_per_word;
20684 enum machine_mode inner_mode;
20685 rtx words[4], shift;
20687 inner_mode = GET_MODE_INNER (mode);
20688 n_elts = GET_MODE_NUNITS (mode);
20689 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20690 n_elt_per_word = n_elts / n_words;
20691 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20693 for (i = 0; i < n_words; ++i)
20695 rtx word = NULL_RTX;
20697 for (j = 0; j < n_elt_per_word; ++j)
20699 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20700 elt = convert_modes (word_mode, inner_mode, elt, true);
20706 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20707 word, 1, OPTAB_LIB_WIDEN);
20708 word = expand_simple_binop (word_mode, IOR, word, elt,
20709 word, 1, OPTAB_LIB_WIDEN);
20717 emit_move_insn (target, gen_lowpart (mode, words[0]));
20718 else if (n_words == 2)
20720 rtx tmp = gen_reg_rtx (mode);
20721 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20722 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20723 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20724 emit_move_insn (target, tmp);
20726 else if (n_words == 4)
20728 rtx tmp = gen_reg_rtx (V4SImode);
20729 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20730 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20731 emit_move_insn (target, gen_lowpart (mode, tmp));
20734 gcc_unreachable ();
20738 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20739 instructions unless MMX_OK is true. */
20742 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20744 enum machine_mode mode = GET_MODE (target);
20745 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20746 int n_elts = GET_MODE_NUNITS (mode);
20747 int n_var = 0, one_var = -1;
20748 bool all_same = true, all_const_zero = true;
20752 for (i = 0; i < n_elts; ++i)
20754 x = XVECEXP (vals, 0, i);
20755 if (!CONSTANT_P (x))
20756 n_var++, one_var = i;
20757 else if (x != CONST0_RTX (inner_mode))
20758 all_const_zero = false;
20759 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20763 /* Constants are best loaded from the constant pool. */
20766 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20770 /* If all values are identical, broadcast the value. */
20772 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20773 XVECEXP (vals, 0, 0)))
20776 /* Values where only one field is non-constant are best loaded from
20777 the pool and overwritten via move later. */
20781 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20782 XVECEXP (vals, 0, one_var),
20786 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20790 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20794 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20796 enum machine_mode mode = GET_MODE (target);
20797 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20798 bool use_vec_merge = false;
20807 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20808 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20810 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20812 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20813 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20823 /* For the two element vectors, we implement a VEC_CONCAT with
20824 the extraction of the other element. */
20826 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20827 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20830 op0 = val, op1 = tmp;
20832 op0 = tmp, op1 = val;
20834 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20835 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20843 use_vec_merge = true;
20847 /* tmp = target = A B C D */
20848 tmp = copy_to_reg (target);
20849 /* target = A A B B */
20850 emit_insn (gen_sse_unpcklps (target, target, target));
20851 /* target = X A B B */
20852 ix86_expand_vector_set (false, target, val, 0);
20853 /* target = A X C D */
20854 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20855 GEN_INT (1), GEN_INT (0),
20856 GEN_INT (2+4), GEN_INT (3+4)));
20860 /* tmp = target = A B C D */
20861 tmp = copy_to_reg (target);
20862 /* tmp = X B C D */
20863 ix86_expand_vector_set (false, tmp, val, 0);
20864 /* target = A B X D */
20865 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20866 GEN_INT (0), GEN_INT (1),
20867 GEN_INT (0+4), GEN_INT (3+4)));
20871 /* tmp = target = A B C D */
20872 tmp = copy_to_reg (target);
20873 /* tmp = X B C D */
20874 ix86_expand_vector_set (false, tmp, val, 0);
20875 /* target = A B X D */
20876 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20877 GEN_INT (0), GEN_INT (1),
20878 GEN_INT (2+4), GEN_INT (0+4)));
20882 gcc_unreachable ();
20887 /* Element 0 handled by vec_merge below. */
20890 use_vec_merge = true;
20896 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20897 store into element 0, then shuffle them back. */
20901 order[0] = GEN_INT (elt);
20902 order[1] = const1_rtx;
20903 order[2] = const2_rtx;
20904 order[3] = GEN_INT (3);
20905 order[elt] = const0_rtx;
20907 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20908 order[1], order[2], order[3]));
20910 ix86_expand_vector_set (false, target, val, 0);
20912 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20913 order[1], order[2], order[3]));
20917 /* For SSE1, we have to reuse the V4SF code. */
20918 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20919 gen_lowpart (SFmode, val), elt);
20924 use_vec_merge = TARGET_SSE2;
20927 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20938 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20939 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20940 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20944 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20946 emit_move_insn (mem, target);
20948 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20949 emit_move_insn (tmp, val);
20951 emit_move_insn (target, mem);
20956 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20958 enum machine_mode mode = GET_MODE (vec);
20959 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20960 bool use_vec_extr = false;
20973 use_vec_extr = true;
20985 tmp = gen_reg_rtx (mode);
20986 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20987 GEN_INT (elt), GEN_INT (elt),
20988 GEN_INT (elt+4), GEN_INT (elt+4)));
20992 tmp = gen_reg_rtx (mode);
20993 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20997 gcc_unreachable ();
21000 use_vec_extr = true;
21015 tmp = gen_reg_rtx (mode);
21016 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21017 GEN_INT (elt), GEN_INT (elt),
21018 GEN_INT (elt), GEN_INT (elt)));
21022 tmp = gen_reg_rtx (mode);
21023 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21027 gcc_unreachable ();
21030 use_vec_extr = true;
21035 /* For SSE1, we have to reuse the V4SF code. */
21036 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21037 gen_lowpart (V4SFmode, vec), elt);
21043 use_vec_extr = TARGET_SSE2;
21046 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21051 /* ??? Could extract the appropriate HImode element and shift. */
21058 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21059 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21061 /* Let the rtl optimizers know about the zero extension performed. */
21062 if (inner_mode == HImode)
21064 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21065 target = gen_lowpart (SImode, target);
21068 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21072 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21074 emit_move_insn (mem, vec);
21076 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21077 emit_move_insn (target, tmp);
21081 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21082 pattern to reduce; DEST is the destination; IN is the input vector. */
21085 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21087 rtx tmp1, tmp2, tmp3;
21089 tmp1 = gen_reg_rtx (V4SFmode);
21090 tmp2 = gen_reg_rtx (V4SFmode);
21091 tmp3 = gen_reg_rtx (V4SFmode);
21093 emit_insn (gen_sse_movhlps (tmp1, in, in));
21094 emit_insn (fn (tmp2, tmp1, in));
21096 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21097 GEN_INT (1), GEN_INT (1),
21098 GEN_INT (1+4), GEN_INT (1+4)));
21099 emit_insn (fn (dest, tmp2, tmp3));
21102 /* Target hook for scalar_mode_supported_p. */
21104 ix86_scalar_mode_supported_p (enum machine_mode mode)
21106 if (DECIMAL_FLOAT_MODE_P (mode))
21109 return default_scalar_mode_supported_p (mode);
21112 /* Implements target hook vector_mode_supported_p. */
21114 ix86_vector_mode_supported_p (enum machine_mode mode)
21116 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21118 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21120 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21122 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21127 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21129 We do this in the new i386 backend to maintain source compatibility
21130 with the old cc0-based compiler. */
21133 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21134 tree inputs ATTRIBUTE_UNUSED,
21137 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21139 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21144 /* Implements target vector targetm.asm.encode_section_info. This
21145 is not used by netware. */
21147 static void ATTRIBUTE_UNUSED
21148 ix86_encode_section_info (tree decl, rtx rtl, int first)
21150 default_encode_section_info (decl, rtl, first);
21152 if (TREE_CODE (decl) == VAR_DECL
21153 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21154 && ix86_in_large_data_p (decl))
21155 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21158 /* Worker function for REVERSE_CONDITION. */
21161 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21163 return (mode != CCFPmode && mode != CCFPUmode
21164 ? reverse_condition (code)
21165 : reverse_condition_maybe_unordered (code));
21168 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21172 output_387_reg_move (rtx insn, rtx *operands)
21174 if (REG_P (operands[0]))
21176 if (REG_P (operands[1])
21177 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21179 if (REGNO (operands[0]) == FIRST_STACK_REG)
21180 return output_387_ffreep (operands, 0);
21181 return "fstp\t%y0";
21183 if (STACK_TOP_P (operands[0]))
21184 return "fld%z1\t%y1";
21187 else if (MEM_P (operands[0]))
21189 gcc_assert (REG_P (operands[1]));
21190 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21191 return "fstp%z0\t%y0";
21194 /* There is no non-popping store to memory for XFmode.
21195 So if we need one, follow the store with a load. */
21196 if (GET_MODE (operands[0]) == XFmode)
21197 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21199 return "fst%z0\t%y0";
21206 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21207 FP status register is set. */
21210 ix86_emit_fp_unordered_jump (rtx label)
21212 rtx reg = gen_reg_rtx (HImode);
21215 emit_insn (gen_x86_fnstsw_1 (reg));
21217 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21219 emit_insn (gen_x86_sahf_1 (reg));
21221 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21222 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21226 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21228 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21229 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21232 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21233 gen_rtx_LABEL_REF (VOIDmode, label),
21235 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21237 emit_jump_insn (temp);
21238 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21241 /* Output code to perform a log1p XFmode calculation. */
21243 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21245 rtx label1 = gen_label_rtx ();
21246 rtx label2 = gen_label_rtx ();
21248 rtx tmp = gen_reg_rtx (XFmode);
21249 rtx tmp2 = gen_reg_rtx (XFmode);
21251 emit_insn (gen_absxf2 (tmp, op1));
21252 emit_insn (gen_cmpxf (tmp,
21253 CONST_DOUBLE_FROM_REAL_VALUE (
21254 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21256 emit_jump_insn (gen_bge (label1));
21258 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21259 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21260 emit_jump (label2);
21262 emit_label (label1);
21263 emit_move_insn (tmp, CONST1_RTX (XFmode));
21264 emit_insn (gen_addxf3 (tmp, op1, tmp));
21265 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21266 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21268 emit_label (label2);
21271 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21273 static void ATTRIBUTE_UNUSED
21274 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21277 /* With Binutils 2.15, the "@unwind" marker must be specified on
21278 every occurrence of the ".eh_frame" section, not just the first
21281 && strcmp (name, ".eh_frame") == 0)
21283 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21284 flags & SECTION_WRITE ? "aw" : "a");
21287 default_elf_asm_named_section (name, flags, decl);
21290 /* Return the mangling of TYPE if it is an extended fundamental type. */
21292 static const char *
21293 ix86_mangle_fundamental_type (tree type)
21295 switch (TYPE_MODE (type))
21298 /* __float128 is "g". */
21301 /* "long double" or __float80 is "e". */
21308 /* For 32-bit code we can save PIC register setup by using
21309 __stack_chk_fail_local hidden function instead of calling
21310 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21311 register, so it is better to call __stack_chk_fail directly. */
21314 ix86_stack_protect_fail (void)
21316 return TARGET_64BIT
21317 ? default_external_stack_protect_fail ()
21318 : default_hidden_stack_protect_fail ();
21321 /* Select a format to encode pointers in exception handling data. CODE
21322 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21323 true if the symbol may be affected by dynamic relocations.
21325 ??? All x86 object file formats are capable of representing this.
21326 After all, the relocation needed is the same as for the call insn.
21327 Whether or not a particular assembler allows us to enter such, I
21328 guess we'll have to see. */
21330 asm_preferred_eh_data_format (int code, int global)
21334 int type = DW_EH_PE_sdata8;
21336 || ix86_cmodel == CM_SMALL_PIC
21337 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21338 type = DW_EH_PE_sdata4;
21339 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21341 if (ix86_cmodel == CM_SMALL
21342 || (ix86_cmodel == CM_MEDIUM && code))
21343 return DW_EH_PE_udata4;
21344 return DW_EH_PE_absptr;
21347 /* Expand copysign from SIGN to the positive value ABS_VALUE
21348 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21351 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21353 enum machine_mode mode = GET_MODE (sign);
21354 rtx sgn = gen_reg_rtx (mode);
21355 if (mask == NULL_RTX)
21357 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21358 if (!VECTOR_MODE_P (mode))
21360 /* We need to generate a scalar mode mask in this case. */
21361 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21362 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21363 mask = gen_reg_rtx (mode);
21364 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21368 mask = gen_rtx_NOT (mode, mask);
21369 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21370 gen_rtx_AND (mode, mask, sign)));
21371 emit_insn (gen_rtx_SET (VOIDmode, result,
21372 gen_rtx_IOR (mode, abs_value, sgn)));
21375 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21376 mask for masking out the sign-bit is stored in *SMASK, if that is
21379 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21381 enum machine_mode mode = GET_MODE (op0);
21384 xa = gen_reg_rtx (mode);
21385 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21386 if (!VECTOR_MODE_P (mode))
21388 /* We need to generate a scalar mode mask in this case. */
21389 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21390 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21391 mask = gen_reg_rtx (mode);
21392 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21394 emit_insn (gen_rtx_SET (VOIDmode, xa,
21395 gen_rtx_AND (mode, op0, mask)));
21403 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21404 swapping the operands if SWAP_OPERANDS is true. The expanded
21405 code is a forward jump to a newly created label in case the
21406 comparison is true. The generated label rtx is returned. */
21408 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21409 bool swap_operands)
21420 label = gen_label_rtx ();
21421 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21422 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21423 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21424 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21425 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21426 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21427 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21428 JUMP_LABEL (tmp) = label;
21433 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21434 using comparison code CODE. Operands are swapped for the comparison if
21435 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21437 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21438 bool swap_operands)
21440 enum machine_mode mode = GET_MODE (op0);
21441 rtx mask = gen_reg_rtx (mode);
21450 if (mode == DFmode)
21451 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21452 gen_rtx_fmt_ee (code, mode, op0, op1)));
21454 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21455 gen_rtx_fmt_ee (code, mode, op0, op1)));
21460 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21461 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21463 ix86_gen_TWO52 (enum machine_mode mode)
21465 REAL_VALUE_TYPE TWO52r;
21468 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21469 TWO52 = const_double_from_real_value (TWO52r, mode);
21470 TWO52 = force_reg (mode, TWO52);
21475 /* Expand SSE sequence for computing lround from OP1 storing
21478 ix86_expand_lround (rtx op0, rtx op1)
21480 /* C code for the stuff we're doing below:
21481 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21484 enum machine_mode mode = GET_MODE (op1);
21485 const struct real_format *fmt;
21486 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21489 /* load nextafter (0.5, 0.0) */
21490 fmt = REAL_MODE_FORMAT (mode);
21491 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21492 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21494 /* adj = copysign (0.5, op1) */
21495 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21496 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21498 /* adj = op1 + adj */
21499 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21501 /* op0 = (imode)adj */
21502 expand_fix (op0, adj, 0);
21505 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21508 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21510 /* C code for the stuff we're doing below (for do_floor):
21512 xi -= (double)xi > op1 ? 1 : 0;
21515 enum machine_mode fmode = GET_MODE (op1);
21516 enum machine_mode imode = GET_MODE (op0);
21517 rtx ireg, freg, label, tmp;
21519 /* reg = (long)op1 */
21520 ireg = gen_reg_rtx (imode);
21521 expand_fix (ireg, op1, 0);
21523 /* freg = (double)reg */
21524 freg = gen_reg_rtx (fmode);
21525 expand_float (freg, ireg, 0);
21527 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21528 label = ix86_expand_sse_compare_and_jump (UNLE,
21529 freg, op1, !do_floor);
21530 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21531 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21532 emit_move_insn (ireg, tmp);
21534 emit_label (label);
21535 LABEL_NUSES (label) = 1;
21537 emit_move_insn (op0, ireg);
21540 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21541 result in OPERAND0. */
21543 ix86_expand_rint (rtx operand0, rtx operand1)
21545 /* C code for the stuff we're doing below:
21546 xa = fabs (operand1);
21547 if (!isless (xa, 2**52))
21549 xa = xa + 2**52 - 2**52;
21550 return copysign (xa, operand1);
21552 enum machine_mode mode = GET_MODE (operand0);
21553 rtx res, xa, label, TWO52, mask;
21555 res = gen_reg_rtx (mode);
21556 emit_move_insn (res, operand1);
21558 /* xa = abs (operand1) */
21559 xa = ix86_expand_sse_fabs (res, &mask);
21561 /* if (!isless (xa, TWO52)) goto label; */
21562 TWO52 = ix86_gen_TWO52 (mode);
21563 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21565 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21566 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21568 ix86_sse_copysign_to_positive (res, xa, res, mask);
21570 emit_label (label);
21571 LABEL_NUSES (label) = 1;
21573 emit_move_insn (operand0, res);
21576 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21579 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21581 /* C code for the stuff we expand below.
21582 double xa = fabs (x), x2;
21583 if (!isless (xa, TWO52))
21585 xa = xa + TWO52 - TWO52;
21586 x2 = copysign (xa, x);
21595 enum machine_mode mode = GET_MODE (operand0);
21596 rtx xa, TWO52, tmp, label, one, res, mask;
21598 TWO52 = ix86_gen_TWO52 (mode);
21600 /* Temporary for holding the result, initialized to the input
21601 operand to ease control flow. */
21602 res = gen_reg_rtx (mode);
21603 emit_move_insn (res, operand1);
21605 /* xa = abs (operand1) */
21606 xa = ix86_expand_sse_fabs (res, &mask);
21608 /* if (!isless (xa, TWO52)) goto label; */
21609 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21611 /* xa = xa + TWO52 - TWO52; */
21612 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21613 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21615 /* xa = copysign (xa, operand1) */
21616 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21618 /* generate 1.0 or -1.0 */
21619 one = force_reg (mode,
21620 const_double_from_real_value (do_floor
21621 ? dconst1 : dconstm1, mode));
21623 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21624 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21625 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21626 gen_rtx_AND (mode, one, tmp)));
21627 /* We always need to subtract here to preserve signed zero. */
21628 tmp = expand_simple_binop (mode, MINUS,
21629 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21630 emit_move_insn (res, tmp);
21632 emit_label (label);
21633 LABEL_NUSES (label) = 1;
21635 emit_move_insn (operand0, res);
21638 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21641 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21643 /* C code for the stuff we expand below.
21644 double xa = fabs (x), x2;
21645 if (!isless (xa, TWO52))
21647 x2 = (double)(long)x;
21654 if (HONOR_SIGNED_ZEROS (mode))
21655 return copysign (x2, x);
21658 enum machine_mode mode = GET_MODE (operand0);
21659 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21661 TWO52 = ix86_gen_TWO52 (mode);
21663 /* Temporary for holding the result, initialized to the input
21664 operand to ease control flow. */
21665 res = gen_reg_rtx (mode);
21666 emit_move_insn (res, operand1);
21668 /* xa = abs (operand1) */
21669 xa = ix86_expand_sse_fabs (res, &mask);
21671 /* if (!isless (xa, TWO52)) goto label; */
21672 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21674 /* xa = (double)(long)x */
21675 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21676 expand_fix (xi, res, 0);
21677 expand_float (xa, xi, 0);
21680 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21682 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21683 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21684 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21685 gen_rtx_AND (mode, one, tmp)));
21686 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21687 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21688 emit_move_insn (res, tmp);
21690 if (HONOR_SIGNED_ZEROS (mode))
21691 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21693 emit_label (label);
21694 LABEL_NUSES (label) = 1;
21696 emit_move_insn (operand0, res);
21699 /* Expand SSE sequence for computing round from OPERAND1 storing
21700 into OPERAND0. Sequence that works without relying on DImode truncation
21701 via cvttsd2siq that is only available on 64bit targets. */
21703 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21705 /* C code for the stuff we expand below.
21706 double xa = fabs (x), xa2, x2;
21707 if (!isless (xa, TWO52))
21709 Using the absolute value and copying back sign makes
21710 -0.0 -> -0.0 correct.
21711 xa2 = xa + TWO52 - TWO52;
21716 else if (dxa > 0.5)
21718 x2 = copysign (xa2, x);
21721 enum machine_mode mode = GET_MODE (operand0);
21722 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21724 TWO52 = ix86_gen_TWO52 (mode);
21726 /* Temporary for holding the result, initialized to the input
21727 operand to ease control flow. */
21728 res = gen_reg_rtx (mode);
21729 emit_move_insn (res, operand1);
21731 /* xa = abs (operand1) */
21732 xa = ix86_expand_sse_fabs (res, &mask);
21734 /* if (!isless (xa, TWO52)) goto label; */
21735 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21737 /* xa2 = xa + TWO52 - TWO52; */
21738 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21739 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21741 /* dxa = xa2 - xa; */
21742 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21744 /* generate 0.5, 1.0 and -0.5 */
21745 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21746 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21747 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21751 tmp = gen_reg_rtx (mode);
21752 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21753 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21754 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21755 gen_rtx_AND (mode, one, tmp)));
21756 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21757 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21758 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21759 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21760 gen_rtx_AND (mode, one, tmp)));
21761 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21763 /* res = copysign (xa2, operand1) */
21764 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21766 emit_label (label);
21767 LABEL_NUSES (label) = 1;
21769 emit_move_insn (operand0, res);
21772 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21775 ix86_expand_trunc (rtx operand0, rtx operand1)
21777 /* C code for SSE variant we expand below.
21778 double xa = fabs (x), x2;
21779 if (!isless (xa, TWO52))
21781 x2 = (double)(long)x;
21782 if (HONOR_SIGNED_ZEROS (mode))
21783 return copysign (x2, x);
21786 enum machine_mode mode = GET_MODE (operand0);
21787 rtx xa, xi, TWO52, label, res, mask;
21789 TWO52 = ix86_gen_TWO52 (mode);
21791 /* Temporary for holding the result, initialized to the input
21792 operand to ease control flow. */
21793 res = gen_reg_rtx (mode);
21794 emit_move_insn (res, operand1);
21796 /* xa = abs (operand1) */
21797 xa = ix86_expand_sse_fabs (res, &mask);
21799 /* if (!isless (xa, TWO52)) goto label; */
21800 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21802 /* x = (double)(long)x */
21803 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21804 expand_fix (xi, res, 0);
21805 expand_float (res, xi, 0);
21807 if (HONOR_SIGNED_ZEROS (mode))
21808 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21810 emit_label (label);
21811 LABEL_NUSES (label) = 1;
21813 emit_move_insn (operand0, res);
21816 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21819 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21821 enum machine_mode mode = GET_MODE (operand0);
21822 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21824 /* C code for SSE variant we expand below.
21825 double xa = fabs (x), x2;
21826 if (!isless (xa, TWO52))
21828 xa2 = xa + TWO52 - TWO52;
21832 x2 = copysign (xa2, x);
21836 TWO52 = ix86_gen_TWO52 (mode);
21838 /* Temporary for holding the result, initialized to the input
21839 operand to ease control flow. */
21840 res = gen_reg_rtx (mode);
21841 emit_move_insn (res, operand1);
21843 /* xa = abs (operand1) */
21844 xa = ix86_expand_sse_fabs (res, &smask);
21846 /* if (!isless (xa, TWO52)) goto label; */
21847 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21849 /* res = xa + TWO52 - TWO52; */
21850 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21851 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21852 emit_move_insn (res, tmp);
21855 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21857 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21858 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21859 emit_insn (gen_rtx_SET (VOIDmode, mask,
21860 gen_rtx_AND (mode, mask, one)));
21861 tmp = expand_simple_binop (mode, MINUS,
21862 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21863 emit_move_insn (res, tmp);
21865 /* res = copysign (res, operand1) */
21866 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21868 emit_label (label);
21869 LABEL_NUSES (label) = 1;
21871 emit_move_insn (operand0, res);
21874 /* Expand SSE sequence for computing round from OPERAND1 storing
21877 ix86_expand_round (rtx operand0, rtx operand1)
21879 /* C code for the stuff we're doing below:
21880 double xa = fabs (x);
21881 if (!isless (xa, TWO52))
21883 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21884 return copysign (xa, x);
21886 enum machine_mode mode = GET_MODE (operand0);
21887 rtx res, TWO52, xa, label, xi, half, mask;
21888 const struct real_format *fmt;
21889 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21891 /* Temporary for holding the result, initialized to the input
21892 operand to ease control flow. */
21893 res = gen_reg_rtx (mode);
21894 emit_move_insn (res, operand1);
21896 TWO52 = ix86_gen_TWO52 (mode);
21897 xa = ix86_expand_sse_fabs (res, &mask);
21898 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21900 /* load nextafter (0.5, 0.0) */
21901 fmt = REAL_MODE_FORMAT (mode);
21902 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21903 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21905 /* xa = xa + 0.5 */
21906 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21907 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21909 /* xa = (double)(int64_t)xa */
21910 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21911 expand_fix (xi, xa, 0);
21912 expand_float (xa, xi, 0);
21914 /* res = copysign (xa, operand1) */
21915 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21917 emit_label (label);
21918 LABEL_NUSES (label) = 1;
21920 emit_move_insn (operand0, res);
21924 /* Table of valid machine attributes. */
21925 static const struct attribute_spec ix86_attribute_table[] =
21927 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21928 /* Stdcall attribute says callee is responsible for popping arguments
21929 if they are not variable. */
21930 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21931 /* Fastcall attribute says callee is responsible for popping arguments
21932 if they are not variable. */
21933 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21934 /* Cdecl attribute says the callee is a normal C declaration */
21935 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21936 /* Regparm attribute specifies how many integer arguments are to be
21937 passed in registers. */
21938 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21939 /* Sseregparm attribute says we are using x86_64 calling conventions
21940 for FP arguments. */
21941 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21942 /* force_align_arg_pointer says this function realigns the stack at entry. */
21943 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21944 false, true, true, ix86_handle_cconv_attribute },
21945 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21946 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21947 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21948 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21950 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21951 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21952 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21953 SUBTARGET_ATTRIBUTE_TABLE,
21955 { NULL, 0, 0, false, false, false, NULL }
21958 /* Initialize the GCC target structure. */
21959 #undef TARGET_ATTRIBUTE_TABLE
21960 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21961 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21962 # undef TARGET_MERGE_DECL_ATTRIBUTES
21963 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21966 #undef TARGET_COMP_TYPE_ATTRIBUTES
21967 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21969 #undef TARGET_INIT_BUILTINS
21970 #define TARGET_INIT_BUILTINS ix86_init_builtins
21971 #undef TARGET_EXPAND_BUILTIN
21972 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21974 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21975 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21976 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21977 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21979 #undef TARGET_ASM_FUNCTION_EPILOGUE
21980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21982 #undef TARGET_ENCODE_SECTION_INFO
21983 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21984 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21986 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21989 #undef TARGET_ASM_OPEN_PAREN
21990 #define TARGET_ASM_OPEN_PAREN ""
21991 #undef TARGET_ASM_CLOSE_PAREN
21992 #define TARGET_ASM_CLOSE_PAREN ""
21994 #undef TARGET_ASM_ALIGNED_HI_OP
21995 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
21996 #undef TARGET_ASM_ALIGNED_SI_OP
21997 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
21999 #undef TARGET_ASM_ALIGNED_DI_OP
22000 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22003 #undef TARGET_ASM_UNALIGNED_HI_OP
22004 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22005 #undef TARGET_ASM_UNALIGNED_SI_OP
22006 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22007 #undef TARGET_ASM_UNALIGNED_DI_OP
22008 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22010 #undef TARGET_SCHED_ADJUST_COST
22011 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22012 #undef TARGET_SCHED_ISSUE_RATE
22013 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22014 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22015 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22016 ia32_multipass_dfa_lookahead
22018 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22019 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22022 #undef TARGET_HAVE_TLS
22023 #define TARGET_HAVE_TLS true
22025 #undef TARGET_CANNOT_FORCE_CONST_MEM
22026 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22027 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22028 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22030 #undef TARGET_DELEGITIMIZE_ADDRESS
22031 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22033 #undef TARGET_MS_BITFIELD_LAYOUT_P
22034 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22037 #undef TARGET_BINDS_LOCAL_P
22038 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22040 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22041 #undef TARGET_BINDS_LOCAL_P
22042 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22045 #undef TARGET_ASM_OUTPUT_MI_THUNK
22046 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22047 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22048 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22050 #undef TARGET_ASM_FILE_START
22051 #define TARGET_ASM_FILE_START x86_file_start
22053 #undef TARGET_DEFAULT_TARGET_FLAGS
22054 #define TARGET_DEFAULT_TARGET_FLAGS \
22056 | TARGET_64BIT_DEFAULT \
22057 | TARGET_SUBTARGET_DEFAULT \
22058 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22060 #undef TARGET_HANDLE_OPTION
22061 #define TARGET_HANDLE_OPTION ix86_handle_option
22063 #undef TARGET_RTX_COSTS
22064 #define TARGET_RTX_COSTS ix86_rtx_costs
22065 #undef TARGET_ADDRESS_COST
22066 #define TARGET_ADDRESS_COST ix86_address_cost
22068 #undef TARGET_FIXED_CONDITION_CODE_REGS
22069 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22070 #undef TARGET_CC_MODES_COMPATIBLE
22071 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22073 #undef TARGET_MACHINE_DEPENDENT_REORG
22074 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22076 #undef TARGET_BUILD_BUILTIN_VA_LIST
22077 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22079 #undef TARGET_MD_ASM_CLOBBERS
22080 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22082 #undef TARGET_PROMOTE_PROTOTYPES
22083 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22084 #undef TARGET_STRUCT_VALUE_RTX
22085 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22086 #undef TARGET_SETUP_INCOMING_VARARGS
22087 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22088 #undef TARGET_MUST_PASS_IN_STACK
22089 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22090 #undef TARGET_PASS_BY_REFERENCE
22091 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22092 #undef TARGET_INTERNAL_ARG_POINTER
22093 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22094 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22095 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22096 #undef TARGET_STRICT_ARGUMENT_NAMING
22097 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22099 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22100 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22102 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22103 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22105 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22106 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22109 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22110 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22113 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22114 #undef TARGET_INSERT_ATTRIBUTES
22115 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22118 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22119 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22121 #undef TARGET_STACK_PROTECT_FAIL
22122 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22124 #undef TARGET_FUNCTION_VALUE
22125 #define TARGET_FUNCTION_VALUE ix86_function_value
22127 struct gcc_target targetm = TARGET_INITIALIZER;
22129 #include "gt-i386.h"