1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068 /* X86_TUNE_USE_MOV0 */
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1083 /* X86_TUNE_READ_MODIFY */
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168 /* X86_TUNE_SHIFT1 */
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1253 static enum stringop_alg stringop_alg = no_stringop;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1316 static int const x86_64_int_return_registers[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry GTY(())
1412 unsigned short mode;
1415 struct stack_local_entry *next;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1442 HOST_WIDE_INT frame;
1444 int outgoing_arguments_size;
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold = 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1520 static const char * const x86_64_reg_class_name[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1547 /* Implement TARGET_HANDLE_OPTION. */
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1584 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1591 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1592 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1599 target_flags &= ~MASK_SSE4A;
1600 target_flags_explicit |= MASK_SSE4A;
1609 /* Sometimes certain combinations of command options do not make
1610 sense on a particular target machine. You can define a macro
1611 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1612 defined, is executed once just after all the command options have
1615 Don't use this macro to turn on various extra optimizations for
1616 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1619 override_options (void)
1622 int ix86_tune_defaulted = 0;
1623 unsigned int ix86_arch_mask, ix86_tune_mask;
1625 /* Comes from final.c -- no real reason to change it. */
1626 #define MAX_CODE_ALIGN 16
1630 const struct processor_costs *cost; /* Processor costs */
1631 const int target_enable; /* Target flags to enable. */
1632 const int target_disable; /* Target flags to disable. */
1633 const int align_loop; /* Default alignments. */
1634 const int align_loop_max_skip;
1635 const int align_jump;
1636 const int align_jump_max_skip;
1637 const int align_func;
1639 const processor_target_table[PROCESSOR_max] =
1641 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1642 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1643 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1644 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1645 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1646 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1647 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1648 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1649 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1650 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1651 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1652 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1654 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1657 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1660 const char *const name; /* processor name or nickname. */
1661 const enum processor_type processor;
1662 const enum pta_flags
1668 PTA_PREFETCH_SSE = 1 << 4,
1670 PTA_3DNOW_A = 1 << 6,
1674 PTA_POPCNT = 1 << 10,
1676 PTA_SSE4A = 1 << 12,
1677 PTA_NO_SAHF = 1 << 13
1680 const processor_alias_table[] =
1682 {"i386", PROCESSOR_I386, 0},
1683 {"i486", PROCESSOR_I486, 0},
1684 {"i586", PROCESSOR_PENTIUM, 0},
1685 {"pentium", PROCESSOR_PENTIUM, 0},
1686 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1687 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1688 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1689 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1690 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1691 {"i686", PROCESSOR_PENTIUMPRO, 0},
1692 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1693 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1694 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1695 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1696 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1697 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1698 | PTA_MMX | PTA_PREFETCH_SSE},
1699 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1700 | PTA_MMX | PTA_PREFETCH_SSE},
1701 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1702 | PTA_MMX | PTA_PREFETCH_SSE},
1703 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1704 | PTA_MMX | PTA_PREFETCH_SSE
1705 | PTA_CX16 | PTA_NO_SAHF},
1706 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1707 | PTA_64BIT | PTA_MMX
1708 | PTA_PREFETCH_SSE | PTA_CX16},
1709 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1711 {"k6", PROCESSOR_K6, PTA_MMX},
1712 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1713 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1714 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1716 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1717 | PTA_3DNOW | PTA_3DNOW_A},
1718 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1719 | PTA_3DNOW_A | PTA_SSE},
1720 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_3DNOW_A | PTA_SSE},
1722 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1723 | PTA_3DNOW_A | PTA_SSE},
1724 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1725 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1726 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1727 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1729 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1730 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1731 | PTA_SSE2 | PTA_NO_SAHF},
1732 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1733 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1734 | PTA_SSE2 | PTA_NO_SAHF},
1735 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1736 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1737 | PTA_SSE2 | PTA_NO_SAHF},
1738 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1739 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1740 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1741 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1742 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1743 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1744 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1745 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1746 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1747 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1750 int const pta_size = ARRAY_SIZE (processor_alias_table);
1752 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1753 SUBTARGET_OVERRIDE_OPTIONS;
1756 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1757 SUBSUBTARGET_OVERRIDE_OPTIONS;
1760 /* -fPIC is the default for x86_64. */
1761 if (TARGET_MACHO && TARGET_64BIT)
1764 /* Set the default values for switches whose default depends on TARGET_64BIT
1765 in case they weren't overwritten by command line options. */
1768 /* Mach-O doesn't support omitting the frame pointer for now. */
1769 if (flag_omit_frame_pointer == 2)
1770 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1771 if (flag_asynchronous_unwind_tables == 2)
1772 flag_asynchronous_unwind_tables = 1;
1773 if (flag_pcc_struct_return == 2)
1774 flag_pcc_struct_return = 0;
1778 if (flag_omit_frame_pointer == 2)
1779 flag_omit_frame_pointer = 0;
1780 if (flag_asynchronous_unwind_tables == 2)
1781 flag_asynchronous_unwind_tables = 0;
1782 if (flag_pcc_struct_return == 2)
1783 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1786 /* Need to check -mtune=generic first. */
1787 if (ix86_tune_string)
1789 if (!strcmp (ix86_tune_string, "generic")
1790 || !strcmp (ix86_tune_string, "i686")
1791 /* As special support for cross compilers we read -mtune=native
1792 as -mtune=generic. With native compilers we won't see the
1793 -mtune=native, as it was changed by the driver. */
1794 || !strcmp (ix86_tune_string, "native"))
1797 ix86_tune_string = "generic64";
1799 ix86_tune_string = "generic32";
1801 else if (!strncmp (ix86_tune_string, "generic", 7))
1802 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1806 if (ix86_arch_string)
1807 ix86_tune_string = ix86_arch_string;
1808 if (!ix86_tune_string)
1810 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1811 ix86_tune_defaulted = 1;
1814 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1815 need to use a sensible tune option. */
1816 if (!strcmp (ix86_tune_string, "generic")
1817 || !strcmp (ix86_tune_string, "x86-64")
1818 || !strcmp (ix86_tune_string, "i686"))
1821 ix86_tune_string = "generic64";
1823 ix86_tune_string = "generic32";
1826 if (ix86_stringop_string)
1828 if (!strcmp (ix86_stringop_string, "rep_byte"))
1829 stringop_alg = rep_prefix_1_byte;
1830 else if (!strcmp (ix86_stringop_string, "libcall"))
1831 stringop_alg = libcall;
1832 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1833 stringop_alg = rep_prefix_4_byte;
1834 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1835 stringop_alg = rep_prefix_8_byte;
1836 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1837 stringop_alg = loop_1_byte;
1838 else if (!strcmp (ix86_stringop_string, "loop"))
1839 stringop_alg = loop;
1840 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1841 stringop_alg = unrolled_loop;
1843 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1845 if (!strcmp (ix86_tune_string, "x86-64"))
1846 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1847 "-mtune=generic instead as appropriate.");
1849 if (!ix86_arch_string)
1850 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1851 if (!strcmp (ix86_arch_string, "generic"))
1852 error ("generic CPU can be used only for -mtune= switch");
1853 if (!strncmp (ix86_arch_string, "generic", 7))
1854 error ("bad value (%s) for -march= switch", ix86_arch_string);
1856 if (ix86_cmodel_string != 0)
1858 if (!strcmp (ix86_cmodel_string, "small"))
1859 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1860 else if (!strcmp (ix86_cmodel_string, "medium"))
1861 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1862 else if (!strcmp (ix86_cmodel_string, "large"))
1863 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1865 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1866 else if (!strcmp (ix86_cmodel_string, "32"))
1867 ix86_cmodel = CM_32;
1868 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1869 ix86_cmodel = CM_KERNEL;
1871 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1875 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1876 use of rip-relative addressing. This eliminates fixups that
1877 would otherwise be needed if this object is to be placed in a
1878 DLL, and is essentially just as efficient as direct addressing. */
1879 if (TARGET_64BIT_MS_ABI)
1880 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1881 else if (TARGET_64BIT)
1882 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1884 ix86_cmodel = CM_32;
1886 if (ix86_asm_string != 0)
1889 && !strcmp (ix86_asm_string, "intel"))
1890 ix86_asm_dialect = ASM_INTEL;
1891 else if (!strcmp (ix86_asm_string, "att"))
1892 ix86_asm_dialect = ASM_ATT;
1894 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1896 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1897 error ("code model %qs not supported in the %s bit mode",
1898 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1899 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1900 sorry ("%i-bit mode not compiled in",
1901 (target_flags & MASK_64BIT) ? 64 : 32);
1903 for (i = 0; i < pta_size; i++)
1904 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1906 ix86_arch = processor_alias_table[i].processor;
1907 /* Default cpu tuning to the architecture. */
1908 ix86_tune = ix86_arch;
1909 if (processor_alias_table[i].flags & PTA_MMX
1910 && !(target_flags_explicit & MASK_MMX))
1911 target_flags |= MASK_MMX;
1912 if (processor_alias_table[i].flags & PTA_3DNOW
1913 && !(target_flags_explicit & MASK_3DNOW))
1914 target_flags |= MASK_3DNOW;
1915 if (processor_alias_table[i].flags & PTA_3DNOW_A
1916 && !(target_flags_explicit & MASK_3DNOW_A))
1917 target_flags |= MASK_3DNOW_A;
1918 if (processor_alias_table[i].flags & PTA_SSE
1919 && !(target_flags_explicit & MASK_SSE))
1920 target_flags |= MASK_SSE;
1921 if (processor_alias_table[i].flags & PTA_SSE2
1922 && !(target_flags_explicit & MASK_SSE2))
1923 target_flags |= MASK_SSE2;
1924 if (processor_alias_table[i].flags & PTA_SSE3
1925 && !(target_flags_explicit & MASK_SSE3))
1926 target_flags |= MASK_SSE3;
1927 if (processor_alias_table[i].flags & PTA_SSSE3
1928 && !(target_flags_explicit & MASK_SSSE3))
1929 target_flags |= MASK_SSSE3;
1930 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1931 x86_prefetch_sse = true;
1932 if (processor_alias_table[i].flags & PTA_CX16)
1933 x86_cmpxchg16b = true;
1934 if (processor_alias_table[i].flags & PTA_POPCNT
1935 && !(target_flags_explicit & MASK_POPCNT))
1936 target_flags |= MASK_POPCNT;
1937 if (processor_alias_table[i].flags & PTA_ABM
1938 && !(target_flags_explicit & MASK_ABM))
1939 target_flags |= MASK_ABM;
1940 if (processor_alias_table[i].flags & PTA_SSE4A
1941 && !(target_flags_explicit & MASK_SSE4A))
1942 target_flags |= MASK_SSE4A;
1943 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1945 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1946 error ("CPU you selected does not support x86-64 "
1952 error ("bad value (%s) for -march= switch", ix86_arch_string);
1954 ix86_arch_mask = 1u << ix86_arch;
1955 for (i = 0; i < X86_ARCH_LAST; ++i)
1956 ix86_arch_features[i] &= ix86_arch_mask;
1958 for (i = 0; i < pta_size; i++)
1959 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1961 ix86_tune = processor_alias_table[i].processor;
1962 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1964 if (ix86_tune_defaulted)
1966 ix86_tune_string = "x86-64";
1967 for (i = 0; i < pta_size; i++)
1968 if (! strcmp (ix86_tune_string,
1969 processor_alias_table[i].name))
1971 ix86_tune = processor_alias_table[i].processor;
1974 error ("CPU you selected does not support x86-64 "
1977 /* Intel CPUs have always interpreted SSE prefetch instructions as
1978 NOPs; so, we can enable SSE prefetch instructions even when
1979 -mtune (rather than -march) points us to a processor that has them.
1980 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1981 higher processors. */
1982 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1983 x86_prefetch_sse = true;
1987 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1989 ix86_tune_mask = 1u << ix86_tune;
1990 for (i = 0; i < X86_TUNE_LAST; ++i)
1991 ix86_tune_features[i] &= ix86_tune_mask;
1994 ix86_cost = &size_cost;
1996 ix86_cost = processor_target_table[ix86_tune].cost;
1997 target_flags |= processor_target_table[ix86_tune].target_enable;
1998 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2000 /* Arrange to set up i386_stack_locals for all functions. */
2001 init_machine_status = ix86_init_machine_status;
2003 /* Validate -mregparm= value. */
2004 if (ix86_regparm_string)
2007 warning (0, "-mregparm is ignored in 64-bit mode");
2008 i = atoi (ix86_regparm_string);
2009 if (i < 0 || i > REGPARM_MAX)
2010 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2015 ix86_regparm = REGPARM_MAX;
2017 /* If the user has provided any of the -malign-* options,
2018 warn and use that value only if -falign-* is not set.
2019 Remove this code in GCC 3.2 or later. */
2020 if (ix86_align_loops_string)
2022 warning (0, "-malign-loops is obsolete, use -falign-loops");
2023 if (align_loops == 0)
2025 i = atoi (ix86_align_loops_string);
2026 if (i < 0 || i > MAX_CODE_ALIGN)
2027 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2029 align_loops = 1 << i;
2033 if (ix86_align_jumps_string)
2035 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2036 if (align_jumps == 0)
2038 i = atoi (ix86_align_jumps_string);
2039 if (i < 0 || i > MAX_CODE_ALIGN)
2040 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2042 align_jumps = 1 << i;
2046 if (ix86_align_funcs_string)
2048 warning (0, "-malign-functions is obsolete, use -falign-functions");
2049 if (align_functions == 0)
2051 i = atoi (ix86_align_funcs_string);
2052 if (i < 0 || i > MAX_CODE_ALIGN)
2053 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2055 align_functions = 1 << i;
2059 /* Default align_* from the processor table. */
2060 if (align_loops == 0)
2062 align_loops = processor_target_table[ix86_tune].align_loop;
2063 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2065 if (align_jumps == 0)
2067 align_jumps = processor_target_table[ix86_tune].align_jump;
2068 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2070 if (align_functions == 0)
2072 align_functions = processor_target_table[ix86_tune].align_func;
2075 /* Validate -mbranch-cost= value, or provide default. */
2076 ix86_branch_cost = ix86_cost->branch_cost;
2077 if (ix86_branch_cost_string)
2079 i = atoi (ix86_branch_cost_string);
2081 error ("-mbranch-cost=%d is not between 0 and 5", i);
2083 ix86_branch_cost = i;
2085 if (ix86_section_threshold_string)
2087 i = atoi (ix86_section_threshold_string);
2089 error ("-mlarge-data-threshold=%d is negative", i);
2091 ix86_section_threshold = i;
2094 if (ix86_tls_dialect_string)
2096 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2097 ix86_tls_dialect = TLS_DIALECT_GNU;
2098 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2099 ix86_tls_dialect = TLS_DIALECT_GNU2;
2100 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2101 ix86_tls_dialect = TLS_DIALECT_SUN;
2103 error ("bad value (%s) for -mtls-dialect= switch",
2104 ix86_tls_dialect_string);
2107 if (ix87_precision_string)
2109 i = atoi (ix87_precision_string);
2110 if (i != 32 && i != 64 && i != 80)
2111 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2114 /* Keep nonleaf frame pointers. */
2115 if (flag_omit_frame_pointer)
2116 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2117 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2118 flag_omit_frame_pointer = 1;
2120 /* If we're doing fast math, we don't care about comparison order
2121 wrt NaNs. This lets us use a shorter comparison sequence. */
2122 if (flag_finite_math_only)
2123 target_flags &= ~MASK_IEEE_FP;
2125 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2126 since the insns won't need emulation. */
2127 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2128 target_flags &= ~MASK_NO_FANCY_MATH_387;
2130 /* Likewise, if the target doesn't have a 387, or we've specified
2131 software floating point, don't use 387 inline intrinsics. */
2133 target_flags |= MASK_NO_FANCY_MATH_387;
2135 /* Turn on SSE3 builtins for -mssse3. */
2137 target_flags |= MASK_SSE3;
2139 /* Turn on SSE3 builtins for -msse4a. */
2141 target_flags |= MASK_SSE3;
2143 /* Turn on SSE2 builtins for -msse3. */
2145 target_flags |= MASK_SSE2;
2147 /* Turn on SSE builtins for -msse2. */
2149 target_flags |= MASK_SSE;
2151 /* Turn on MMX builtins for -msse. */
2154 target_flags |= MASK_MMX & ~target_flags_explicit;
2155 x86_prefetch_sse = true;
2158 /* Turn on MMX builtins for 3Dnow. */
2160 target_flags |= MASK_MMX;
2162 /* Turn on POPCNT builtins for -mabm. */
2164 target_flags |= MASK_POPCNT;
2169 warning (0, "-mrtd is ignored in 64bit mode");
2171 /* Enable by default the SSE and MMX builtins. Do allow the user to
2172 explicitly disable any of these. In particular, disabling SSE and
2173 MMX for kernel code is extremely useful. */
2175 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2176 & ~target_flags_explicit);
2180 /* i386 ABI does not specify red zone. It still makes sense to use it
2181 when programmer takes care to stack from being destroyed. */
2182 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2183 target_flags |= MASK_NO_RED_ZONE;
2186 /* Validate -mpreferred-stack-boundary= value, or provide default.
2187 The default of 128 bits is for Pentium III's SSE __m128. We can't
2188 change it because of optimize_size. Otherwise, we can't mix object
2189 files compiled with -Os and -On. */
2190 ix86_preferred_stack_boundary = 128;
2191 if (ix86_preferred_stack_boundary_string)
2193 i = atoi (ix86_preferred_stack_boundary_string);
2194 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2195 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2196 TARGET_64BIT ? 4 : 2);
2198 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2201 /* Accept -msseregparm only if at least SSE support is enabled. */
2202 if (TARGET_SSEREGPARM
2204 error ("-msseregparm used without SSE enabled");
2206 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2207 if (ix86_fpmath_string != 0)
2209 if (! strcmp (ix86_fpmath_string, "387"))
2210 ix86_fpmath = FPMATH_387;
2211 else if (! strcmp (ix86_fpmath_string, "sse"))
2215 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2216 ix86_fpmath = FPMATH_387;
2219 ix86_fpmath = FPMATH_SSE;
2221 else if (! strcmp (ix86_fpmath_string, "387,sse")
2222 || ! strcmp (ix86_fpmath_string, "sse,387"))
2226 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2227 ix86_fpmath = FPMATH_387;
2229 else if (!TARGET_80387)
2231 warning (0, "387 instruction set disabled, using SSE arithmetics");
2232 ix86_fpmath = FPMATH_SSE;
2235 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2238 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2241 /* If the i387 is disabled, then do not return values in it. */
2243 target_flags &= ~MASK_FLOAT_RETURNS;
2245 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2246 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2248 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2250 /* ??? Unwind info is not correct around the CFG unless either a frame
2251 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2252 unwind info generation to be aware of the CFG and propagating states
2254 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2255 || flag_exceptions || flag_non_call_exceptions)
2256 && flag_omit_frame_pointer
2257 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2259 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2260 warning (0, "unwind tables currently require either a frame pointer "
2261 "or -maccumulate-outgoing-args for correctness");
2262 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2265 /* For sane SSE instruction set generation we need fcomi instruction.
2266 It is safe to enable all CMOVE instructions. */
2270 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2273 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2274 p = strchr (internal_label_prefix, 'X');
2275 internal_label_prefix_len = p - internal_label_prefix;
2279 /* When scheduling description is not available, disable scheduler pass
2280 so it won't slow down the compilation and make x87 code slower. */
2281 if (!TARGET_SCHEDULE)
2282 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2284 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2285 set_param_value ("simultaneous-prefetches",
2286 ix86_cost->simultaneous_prefetches);
2287 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2288 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2291 /* Return true if this goes in large data/bss. */
2294 ix86_in_large_data_p (tree exp)
2296 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2299 /* Functions are never large data. */
2300 if (TREE_CODE (exp) == FUNCTION_DECL)
2303 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2305 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2306 if (strcmp (section, ".ldata") == 0
2307 || strcmp (section, ".lbss") == 0)
2313 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2315 /* If this is an incomplete type with size 0, then we can't put it
2316 in data because it might be too big when completed. */
2317 if (!size || size > ix86_section_threshold)
2324 /* Switch to the appropriate section for output of DECL.
2325 DECL is either a `VAR_DECL' node or a constant of some sort.
2326 RELOC indicates whether forming the initial value of DECL requires
2327 link-time relocations. */
2329 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2333 x86_64_elf_select_section (tree decl, int reloc,
2334 unsigned HOST_WIDE_INT align)
2336 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2337 && ix86_in_large_data_p (decl))
2339 const char *sname = NULL;
2340 unsigned int flags = SECTION_WRITE;
2341 switch (categorize_decl_for_section (decl, reloc))
2346 case SECCAT_DATA_REL:
2347 sname = ".ldata.rel";
2349 case SECCAT_DATA_REL_LOCAL:
2350 sname = ".ldata.rel.local";
2352 case SECCAT_DATA_REL_RO:
2353 sname = ".ldata.rel.ro";
2355 case SECCAT_DATA_REL_RO_LOCAL:
2356 sname = ".ldata.rel.ro.local";
2360 flags |= SECTION_BSS;
2363 case SECCAT_RODATA_MERGE_STR:
2364 case SECCAT_RODATA_MERGE_STR_INIT:
2365 case SECCAT_RODATA_MERGE_CONST:
2369 case SECCAT_SRODATA:
2376 /* We don't split these for medium model. Place them into
2377 default sections and hope for best. */
2382 /* We might get called with string constants, but get_named_section
2383 doesn't like them as they are not DECLs. Also, we need to set
2384 flags in that case. */
2386 return get_section (sname, flags, NULL);
2387 return get_named_section (decl, sname, reloc);
2390 return default_elf_select_section (decl, reloc, align);
2393 /* Build up a unique section name, expressed as a
2394 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2395 RELOC indicates whether the initial value of EXP requires
2396 link-time relocations. */
2398 static void ATTRIBUTE_UNUSED
2399 x86_64_elf_unique_section (tree decl, int reloc)
2401 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2402 && ix86_in_large_data_p (decl))
2404 const char *prefix = NULL;
2405 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2406 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2408 switch (categorize_decl_for_section (decl, reloc))
2411 case SECCAT_DATA_REL:
2412 case SECCAT_DATA_REL_LOCAL:
2413 case SECCAT_DATA_REL_RO:
2414 case SECCAT_DATA_REL_RO_LOCAL:
2415 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2418 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2421 case SECCAT_RODATA_MERGE_STR:
2422 case SECCAT_RODATA_MERGE_STR_INIT:
2423 case SECCAT_RODATA_MERGE_CONST:
2424 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2426 case SECCAT_SRODATA:
2433 /* We don't split these for medium model. Place them into
2434 default sections and hope for best. */
2442 plen = strlen (prefix);
2444 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2445 name = targetm.strip_name_encoding (name);
2446 nlen = strlen (name);
2448 string = alloca (nlen + plen + 1);
2449 memcpy (string, prefix, plen);
2450 memcpy (string + plen, name, nlen + 1);
2452 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2456 default_unique_section (decl, reloc);
2459 #ifdef COMMON_ASM_OP
2460 /* This says how to output assembler code to declare an
2461 uninitialized external linkage data object.
2463 For medium model x86-64 we need to use .largecomm opcode for
2466 x86_elf_aligned_common (FILE *file,
2467 const char *name, unsigned HOST_WIDE_INT size,
2470 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2471 && size > (unsigned int)ix86_section_threshold)
2472 fprintf (file, ".largecomm\t");
2474 fprintf (file, "%s", COMMON_ASM_OP);
2475 assemble_name (file, name);
2476 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2477 size, align / BITS_PER_UNIT);
2481 /* Utility function for targets to use in implementing
2482 ASM_OUTPUT_ALIGNED_BSS. */
2485 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2486 const char *name, unsigned HOST_WIDE_INT size,
2489 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2490 && size > (unsigned int)ix86_section_threshold)
2491 switch_to_section (get_named_section (decl, ".lbss", 0));
2493 switch_to_section (bss_section);
2494 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2495 #ifdef ASM_DECLARE_OBJECT_NAME
2496 last_assemble_variable_decl = decl;
2497 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2499 /* Standard thing is just output label for the object. */
2500 ASM_OUTPUT_LABEL (file, name);
2501 #endif /* ASM_DECLARE_OBJECT_NAME */
2502 ASM_OUTPUT_SKIP (file, size ? size : 1);
2506 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2508 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2509 make the problem with not enough registers even worse. */
2510 #ifdef INSN_SCHEDULING
2512 flag_schedule_insns = 0;
2516 /* The Darwin libraries never set errno, so we might as well
2517 avoid calling them when that's the only reason we would. */
2518 flag_errno_math = 0;
2520 /* The default values of these switches depend on the TARGET_64BIT
2521 that is not known at this moment. Mark these values with 2 and
2522 let user the to override these. In case there is no command line option
2523 specifying them, we will set the defaults in override_options. */
2525 flag_omit_frame_pointer = 2;
2526 flag_pcc_struct_return = 2;
2527 flag_asynchronous_unwind_tables = 2;
2528 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2529 SUBTARGET_OPTIMIZATION_OPTIONS;
2533 /* Decide whether we can make a sibling call to a function. DECL is the
2534 declaration of the function being targeted by the call and EXP is the
2535 CALL_EXPR representing the call. */
2538 ix86_function_ok_for_sibcall (tree decl, tree exp)
2543 /* If we are generating position-independent code, we cannot sibcall
2544 optimize any indirect call, or a direct call to a global function,
2545 as the PLT requires %ebx be live. */
2546 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2553 func = TREE_TYPE (CALL_EXPR_FN (exp));
2554 if (POINTER_TYPE_P (func))
2555 func = TREE_TYPE (func);
2558 /* Check that the return value locations are the same. Like
2559 if we are returning floats on the 80387 register stack, we cannot
2560 make a sibcall from a function that doesn't return a float to a
2561 function that does or, conversely, from a function that does return
2562 a float to a function that doesn't; the necessary stack adjustment
2563 would not be executed. This is also the place we notice
2564 differences in the return value ABI. Note that it is ok for one
2565 of the functions to have void return type as long as the return
2566 value of the other is passed in a register. */
2567 a = ix86_function_value (TREE_TYPE (exp), func, false);
2568 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2570 if (STACK_REG_P (a) || STACK_REG_P (b))
2572 if (!rtx_equal_p (a, b))
2575 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2577 else if (!rtx_equal_p (a, b))
2580 /* If this call is indirect, we'll need to be able to use a call-clobbered
2581 register for the address of the target function. Make sure that all
2582 such registers are not used for passing parameters. */
2583 if (!decl && !TARGET_64BIT)
2587 /* We're looking at the CALL_EXPR, we need the type of the function. */
2588 type = CALL_EXPR_FN (exp); /* pointer expression */
2589 type = TREE_TYPE (type); /* pointer type */
2590 type = TREE_TYPE (type); /* function type */
2592 if (ix86_function_regparm (type, NULL) >= 3)
2594 /* ??? Need to count the actual number of registers to be used,
2595 not the possible number of registers. Fix later. */
2600 /* Dllimport'd functions are also called indirectly. */
2601 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2602 && decl && DECL_DLLIMPORT_P (decl)
2603 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2606 /* If we forced aligned the stack, then sibcalling would unalign the
2607 stack, which may break the called function. */
2608 if (cfun->machine->force_align_arg_pointer)
2611 /* Otherwise okay. That also includes certain types of indirect calls. */
2615 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2616 calling convention attributes;
2617 arguments as in struct attribute_spec.handler. */
2620 ix86_handle_cconv_attribute (tree *node, tree name,
2622 int flags ATTRIBUTE_UNUSED,
2625 if (TREE_CODE (*node) != FUNCTION_TYPE
2626 && TREE_CODE (*node) != METHOD_TYPE
2627 && TREE_CODE (*node) != FIELD_DECL
2628 && TREE_CODE (*node) != TYPE_DECL)
2630 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2631 IDENTIFIER_POINTER (name));
2632 *no_add_attrs = true;
2636 /* Can combine regparm with all attributes but fastcall. */
2637 if (is_attribute_p ("regparm", name))
2641 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2643 error ("fastcall and regparm attributes are not compatible");
2646 cst = TREE_VALUE (args);
2647 if (TREE_CODE (cst) != INTEGER_CST)
2649 warning (OPT_Wattributes,
2650 "%qs attribute requires an integer constant argument",
2651 IDENTIFIER_POINTER (name));
2652 *no_add_attrs = true;
2654 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2656 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2657 IDENTIFIER_POINTER (name), REGPARM_MAX);
2658 *no_add_attrs = true;
2662 && lookup_attribute (ix86_force_align_arg_pointer_string,
2663 TYPE_ATTRIBUTES (*node))
2664 && compare_tree_int (cst, REGPARM_MAX-1))
2666 error ("%s functions limited to %d register parameters",
2667 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2675 /* Do not warn when emulating the MS ABI. */
2676 if (!TARGET_64BIT_MS_ABI)
2677 warning (OPT_Wattributes, "%qs attribute ignored",
2678 IDENTIFIER_POINTER (name));
2679 *no_add_attrs = true;
2683 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2684 if (is_attribute_p ("fastcall", name))
2686 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2688 error ("fastcall and cdecl attributes are not compatible");
2690 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2692 error ("fastcall and stdcall attributes are not compatible");
2694 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2696 error ("fastcall and regparm attributes are not compatible");
2700 /* Can combine stdcall with fastcall (redundant), regparm and
2702 else if (is_attribute_p ("stdcall", name))
2704 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2706 error ("stdcall and cdecl attributes are not compatible");
2708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2710 error ("stdcall and fastcall attributes are not compatible");
2714 /* Can combine cdecl with regparm and sseregparm. */
2715 else if (is_attribute_p ("cdecl", name))
2717 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2719 error ("stdcall and cdecl attributes are not compatible");
2721 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2723 error ("fastcall and cdecl attributes are not compatible");
2727 /* Can combine sseregparm with all attributes. */
2732 /* Return 0 if the attributes for two types are incompatible, 1 if they
2733 are compatible, and 2 if they are nearly compatible (which causes a
2734 warning to be generated). */
2737 ix86_comp_type_attributes (tree type1, tree type2)
2739 /* Check for mismatch of non-default calling convention. */
2740 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2742 if (TREE_CODE (type1) != FUNCTION_TYPE)
2745 /* Check for mismatched fastcall/regparm types. */
2746 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2747 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2748 || (ix86_function_regparm (type1, NULL)
2749 != ix86_function_regparm (type2, NULL)))
2752 /* Check for mismatched sseregparm types. */
2753 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2754 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2757 /* Check for mismatched return types (cdecl vs stdcall). */
2758 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2759 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2765 /* Return the regparm value for a function with the indicated TYPE and DECL.
2766 DECL may be NULL when calling function indirectly
2767 or considering a libcall. */
2770 ix86_function_regparm (tree type, tree decl)
2773 int regparm = ix86_regparm;
2778 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2780 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2785 /* Use register calling convention for local functions when possible. */
2786 if (decl && flag_unit_at_a_time && !profile_flag)
2788 struct cgraph_local_info *i = cgraph_local_info (decl);
2791 int local_regparm, globals = 0, regno;
2794 /* Make sure no regparm register is taken by a
2795 global register variable. */
2796 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2797 if (global_regs[local_regparm])
2800 /* We can't use regparm(3) for nested functions as these use
2801 static chain pointer in third argument. */
2802 if (local_regparm == 3
2803 && decl_function_context (decl)
2804 && !DECL_NO_STATIC_CHAIN (decl))
2807 /* If the function realigns its stackpointer, the prologue will
2808 clobber %ecx. If we've already generated code for the callee,
2809 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2810 scanning the attributes for the self-realigning property. */
2811 f = DECL_STRUCT_FUNCTION (decl);
2812 if (local_regparm == 3
2813 && (f ? !!f->machine->force_align_arg_pointer
2814 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2815 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2818 /* Each global register variable increases register preassure,
2819 so the more global reg vars there are, the smaller regparm
2820 optimization use, unless requested by the user explicitly. */
2821 for (regno = 0; regno < 6; regno++)
2822 if (global_regs[regno])
2825 = globals < local_regparm ? local_regparm - globals : 0;
2827 if (local_regparm > regparm)
2828 regparm = local_regparm;
2835 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2836 DFmode (2) arguments in SSE registers for a function with the
2837 indicated TYPE and DECL. DECL may be NULL when calling function
2838 indirectly or considering a libcall. Otherwise return 0. */
2841 ix86_function_sseregparm (tree type, tree decl)
2843 gcc_assert (!TARGET_64BIT);
2845 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2846 by the sseregparm attribute. */
2847 if (TARGET_SSEREGPARM
2848 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2853 error ("Calling %qD with attribute sseregparm without "
2854 "SSE/SSE2 enabled", decl);
2856 error ("Calling %qT with attribute sseregparm without "
2857 "SSE/SSE2 enabled", type);
2864 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2865 (and DFmode for SSE2) arguments in SSE registers. */
2866 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2868 struct cgraph_local_info *i = cgraph_local_info (decl);
2870 return TARGET_SSE2 ? 2 : 1;
2876 /* Return true if EAX is live at the start of the function. Used by
2877 ix86_expand_prologue to determine if we need special help before
2878 calling allocate_stack_worker. */
2881 ix86_eax_live_at_start_p (void)
2883 /* Cheat. Don't bother working forward from ix86_function_regparm
2884 to the function type to whether an actual argument is located in
2885 eax. Instead just look at cfg info, which is still close enough
2886 to correct at this point. This gives false positives for broken
2887 functions that might use uninitialized data that happens to be
2888 allocated in eax, but who cares? */
2889 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2892 /* Return true if TYPE has a variable argument list. */
2895 type_has_variadic_args_p (tree type)
2897 tree n, t = TYPE_ARG_TYPES (type);
2902 while ((n = TREE_CHAIN (t)) != NULL)
2905 return TREE_VALUE (t) != void_type_node;
2908 /* Value is the number of bytes of arguments automatically
2909 popped when returning from a subroutine call.
2910 FUNDECL is the declaration node of the function (as a tree),
2911 FUNTYPE is the data type of the function (as a tree),
2912 or for a library call it is an identifier node for the subroutine name.
2913 SIZE is the number of bytes of arguments passed on the stack.
2915 On the 80386, the RTD insn may be used to pop them if the number
2916 of args is fixed, but if the number is variable then the caller
2917 must pop them all. RTD can't be used for library calls now
2918 because the library is compiled with the Unix compiler.
2919 Use of RTD is a selectable option, since it is incompatible with
2920 standard Unix calling sequences. If the option is not selected,
2921 the caller must always pop the args.
2923 The attribute stdcall is equivalent to RTD on a per module basis. */
2926 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2930 /* None of the 64-bit ABIs pop arguments. */
2934 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2936 /* Cdecl functions override -mrtd, and never pop the stack. */
2937 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2939 /* Stdcall and fastcall functions will pop the stack if not
2941 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2942 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2945 if (rtd && ! type_has_variadic_args_p (funtype))
2949 /* Lose any fake structure return argument if it is passed on the stack. */
2950 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2951 && !KEEP_AGGREGATE_RETURN_POINTER)
2953 int nregs = ix86_function_regparm (funtype, fundecl);
2955 return GET_MODE_SIZE (Pmode);
2961 /* Argument support functions. */
2963 /* Return true when register may be used to pass function parameters. */
2965 ix86_function_arg_regno_p (int regno)
2968 const int *parm_regs;
2973 return (regno < REGPARM_MAX
2974 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2976 return (regno < REGPARM_MAX
2977 || (TARGET_MMX && MMX_REGNO_P (regno)
2978 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2979 || (TARGET_SSE && SSE_REGNO_P (regno)
2980 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2985 if (SSE_REGNO_P (regno) && TARGET_SSE)
2990 if (TARGET_SSE && SSE_REGNO_P (regno)
2991 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2995 /* RAX is used as hidden argument to va_arg functions. */
2996 if (!TARGET_64BIT_MS_ABI && regno == 0)
2999 if (TARGET_64BIT_MS_ABI)
3000 parm_regs = x86_64_ms_abi_int_parameter_registers;
3002 parm_regs = x86_64_int_parameter_registers;
3003 for (i = 0; i < REGPARM_MAX; i++)
3004 if (regno == parm_regs[i])
3009 /* Return if we do not know how to pass TYPE solely in registers. */
3012 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3014 if (must_pass_in_stack_var_size_or_pad (mode, type))
3017 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3018 The layout_type routine is crafty and tries to trick us into passing
3019 currently unsupported vector types on the stack by using TImode. */
3020 return (!TARGET_64BIT && mode == TImode
3021 && type && TREE_CODE (type) != VECTOR_TYPE);
3024 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3025 for a call to a function whose data type is FNTYPE.
3026 For a library call, FNTYPE is 0. */
3029 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3030 tree fntype, /* tree ptr for function decl */
3031 rtx libname, /* SYMBOL_REF of library name or 0 */
3034 memset (cum, 0, sizeof (*cum));
3036 /* Set up the number of registers to use for passing arguments. */
3037 cum->nregs = ix86_regparm;
3039 cum->sse_nregs = SSE_REGPARM_MAX;
3041 cum->mmx_nregs = MMX_REGPARM_MAX;
3042 cum->warn_sse = true;
3043 cum->warn_mmx = true;
3044 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3048 /* If there are variable arguments, then we won't pass anything
3049 in registers in 32-bit mode. */
3050 if (cum->maybe_vaarg)
3060 /* Use ecx and edx registers if function has fastcall attribute,
3061 else look for regparm information. */
3064 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3070 cum->nregs = ix86_function_regparm (fntype, fndecl);
3073 /* Set up the number of SSE registers used for passing SFmode
3074 and DFmode arguments. Warn for mismatching ABI. */
3075 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3079 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3080 But in the case of vector types, it is some vector mode.
3082 When we have only some of our vector isa extensions enabled, then there
3083 are some modes for which vector_mode_supported_p is false. For these
3084 modes, the generic vector support in gcc will choose some non-vector mode
3085 in order to implement the type. By computing the natural mode, we'll
3086 select the proper ABI location for the operand and not depend on whatever
3087 the middle-end decides to do with these vector types. */
3089 static enum machine_mode
3090 type_natural_mode (tree type)
3092 enum machine_mode mode = TYPE_MODE (type);
3094 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3096 HOST_WIDE_INT size = int_size_in_bytes (type);
3097 if ((size == 8 || size == 16)
3098 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3099 && TYPE_VECTOR_SUBPARTS (type) > 1)
3101 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3103 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3104 mode = MIN_MODE_VECTOR_FLOAT;
3106 mode = MIN_MODE_VECTOR_INT;
3108 /* Get the mode which has this inner mode and number of units. */
3109 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3110 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3111 && GET_MODE_INNER (mode) == innermode)
3121 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3122 this may not agree with the mode that the type system has chosen for the
3123 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3124 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3127 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3132 if (orig_mode != BLKmode)
3133 tmp = gen_rtx_REG (orig_mode, regno);
3136 tmp = gen_rtx_REG (mode, regno);
3137 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3138 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3144 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3145 of this code is to classify each 8bytes of incoming argument by the register
3146 class and assign registers accordingly. */
3148 /* Return the union class of CLASS1 and CLASS2.
3149 See the x86-64 PS ABI for details. */
3151 static enum x86_64_reg_class
3152 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3154 /* Rule #1: If both classes are equal, this is the resulting class. */
3155 if (class1 == class2)
3158 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3160 if (class1 == X86_64_NO_CLASS)
3162 if (class2 == X86_64_NO_CLASS)
3165 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3166 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3167 return X86_64_MEMORY_CLASS;
3169 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3170 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3171 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3172 return X86_64_INTEGERSI_CLASS;
3173 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3174 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3175 return X86_64_INTEGER_CLASS;
3177 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3179 if (class1 == X86_64_X87_CLASS
3180 || class1 == X86_64_X87UP_CLASS
3181 || class1 == X86_64_COMPLEX_X87_CLASS
3182 || class2 == X86_64_X87_CLASS
3183 || class2 == X86_64_X87UP_CLASS
3184 || class2 == X86_64_COMPLEX_X87_CLASS)
3185 return X86_64_MEMORY_CLASS;
3187 /* Rule #6: Otherwise class SSE is used. */
3188 return X86_64_SSE_CLASS;
3191 /* Classify the argument of type TYPE and mode MODE.
3192 CLASSES will be filled by the register class used to pass each word
3193 of the operand. The number of words is returned. In case the parameter
3194 should be passed in memory, 0 is returned. As a special case for zero
3195 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3197 BIT_OFFSET is used internally for handling records and specifies offset
3198 of the offset in bits modulo 256 to avoid overflow cases.
3200 See the x86-64 PS ABI for details.
3204 classify_argument (enum machine_mode mode, tree type,
3205 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3207 HOST_WIDE_INT bytes =
3208 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3209 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3211 /* Variable sized entities are always passed/returned in memory. */
3215 if (mode != VOIDmode
3216 && targetm.calls.must_pass_in_stack (mode, type))
3219 if (type && AGGREGATE_TYPE_P (type))
3223 enum x86_64_reg_class subclasses[MAX_CLASSES];
3225 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3229 for (i = 0; i < words; i++)
3230 classes[i] = X86_64_NO_CLASS;
3232 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3233 signalize memory class, so handle it as special case. */
3236 classes[0] = X86_64_NO_CLASS;
3240 /* Classify each field of record and merge classes. */
3241 switch (TREE_CODE (type))
3244 /* And now merge the fields of structure. */
3245 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3247 if (TREE_CODE (field) == FIELD_DECL)
3251 if (TREE_TYPE (field) == error_mark_node)
3254 /* Bitfields are always classified as integer. Handle them
3255 early, since later code would consider them to be
3256 misaligned integers. */
3257 if (DECL_BIT_FIELD (field))
3259 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3260 i < ((int_bit_position (field) + (bit_offset % 64))
3261 + tree_low_cst (DECL_SIZE (field), 0)
3264 merge_classes (X86_64_INTEGER_CLASS,
3269 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3270 TREE_TYPE (field), subclasses,
3271 (int_bit_position (field)
3272 + bit_offset) % 256);
3275 for (i = 0; i < num; i++)
3278 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3280 merge_classes (subclasses[i], classes[i + pos]);
3288 /* Arrays are handled as small records. */
3291 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3292 TREE_TYPE (type), subclasses, bit_offset);
3296 /* The partial classes are now full classes. */
3297 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3298 subclasses[0] = X86_64_SSE_CLASS;
3299 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3300 subclasses[0] = X86_64_INTEGER_CLASS;
3302 for (i = 0; i < words; i++)
3303 classes[i] = subclasses[i % num];
3308 case QUAL_UNION_TYPE:
3309 /* Unions are similar to RECORD_TYPE but offset is always 0.
3311 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3313 if (TREE_CODE (field) == FIELD_DECL)
3317 if (TREE_TYPE (field) == error_mark_node)
3320 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3321 TREE_TYPE (field), subclasses,
3325 for (i = 0; i < num; i++)
3326 classes[i] = merge_classes (subclasses[i], classes[i]);
3335 /* Final merger cleanup. */
3336 for (i = 0; i < words; i++)
3338 /* If one class is MEMORY, everything should be passed in
3340 if (classes[i] == X86_64_MEMORY_CLASS)
3343 /* The X86_64_SSEUP_CLASS should be always preceded by
3344 X86_64_SSE_CLASS. */
3345 if (classes[i] == X86_64_SSEUP_CLASS
3346 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3347 classes[i] = X86_64_SSE_CLASS;
3349 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3350 if (classes[i] == X86_64_X87UP_CLASS
3351 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3352 classes[i] = X86_64_SSE_CLASS;
3357 /* Compute alignment needed. We align all types to natural boundaries with
3358 exception of XFmode that is aligned to 64bits. */
3359 if (mode != VOIDmode && mode != BLKmode)
3361 int mode_alignment = GET_MODE_BITSIZE (mode);
3364 mode_alignment = 128;
3365 else if (mode == XCmode)
3366 mode_alignment = 256;
3367 if (COMPLEX_MODE_P (mode))
3368 mode_alignment /= 2;
3369 /* Misaligned fields are always returned in memory. */
3370 if (bit_offset % mode_alignment)
3374 /* for V1xx modes, just use the base mode */
3375 if (VECTOR_MODE_P (mode)
3376 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3377 mode = GET_MODE_INNER (mode);
3379 /* Classification of atomic types. */
3384 classes[0] = X86_64_SSE_CLASS;
3387 classes[0] = X86_64_SSE_CLASS;
3388 classes[1] = X86_64_SSEUP_CLASS;
3397 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3398 classes[0] = X86_64_INTEGERSI_CLASS;
3400 classes[0] = X86_64_INTEGER_CLASS;
3404 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3409 if (!(bit_offset % 64))
3410 classes[0] = X86_64_SSESF_CLASS;
3412 classes[0] = X86_64_SSE_CLASS;
3415 classes[0] = X86_64_SSEDF_CLASS;
3418 classes[0] = X86_64_X87_CLASS;
3419 classes[1] = X86_64_X87UP_CLASS;
3422 classes[0] = X86_64_SSE_CLASS;
3423 classes[1] = X86_64_SSEUP_CLASS;
3426 classes[0] = X86_64_SSE_CLASS;
3429 classes[0] = X86_64_SSEDF_CLASS;
3430 classes[1] = X86_64_SSEDF_CLASS;
3433 classes[0] = X86_64_COMPLEX_X87_CLASS;
3436 /* This modes is larger than 16 bytes. */
3444 classes[0] = X86_64_SSE_CLASS;
3445 classes[1] = X86_64_SSEUP_CLASS;
3451 classes[0] = X86_64_SSE_CLASS;
3457 gcc_assert (VECTOR_MODE_P (mode));
3462 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3464 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3465 classes[0] = X86_64_INTEGERSI_CLASS;
3467 classes[0] = X86_64_INTEGER_CLASS;
3468 classes[1] = X86_64_INTEGER_CLASS;
3469 return 1 + (bytes > 8);
3473 /* Examine the argument and return set number of register required in each
3474 class. Return 0 iff parameter should be passed in memory. */
3476 examine_argument (enum machine_mode mode, tree type, int in_return,
3477 int *int_nregs, int *sse_nregs)
3479 enum x86_64_reg_class class[MAX_CLASSES];
3480 int n = classify_argument (mode, type, class, 0);
3486 for (n--; n >= 0; n--)
3489 case X86_64_INTEGER_CLASS:
3490 case X86_64_INTEGERSI_CLASS:
3493 case X86_64_SSE_CLASS:
3494 case X86_64_SSESF_CLASS:
3495 case X86_64_SSEDF_CLASS:
3498 case X86_64_NO_CLASS:
3499 case X86_64_SSEUP_CLASS:
3501 case X86_64_X87_CLASS:
3502 case X86_64_X87UP_CLASS:
3506 case X86_64_COMPLEX_X87_CLASS:
3507 return in_return ? 2 : 0;
3508 case X86_64_MEMORY_CLASS:
3514 /* Construct container for the argument used by GCC interface. See
3515 FUNCTION_ARG for the detailed description. */
3518 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3519 tree type, int in_return, int nintregs, int nsseregs,
3520 const int *intreg, int sse_regno)
3522 /* The following variables hold the static issued_error state. */
3523 static bool issued_sse_arg_error;
3524 static bool issued_sse_ret_error;
3525 static bool issued_x87_ret_error;
3527 enum machine_mode tmpmode;
3529 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3530 enum x86_64_reg_class class[MAX_CLASSES];
3534 int needed_sseregs, needed_intregs;
3535 rtx exp[MAX_CLASSES];
3538 n = classify_argument (mode, type, class, 0);
3541 if (!examine_argument (mode, type, in_return, &needed_intregs,
3544 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3547 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3548 some less clueful developer tries to use floating-point anyway. */
3549 if (needed_sseregs && !TARGET_SSE)
3553 if (!issued_sse_ret_error)
3555 error ("SSE register return with SSE disabled");
3556 issued_sse_ret_error = true;
3559 else if (!issued_sse_arg_error)
3561 error ("SSE register argument with SSE disabled");
3562 issued_sse_arg_error = true;
3567 /* Likewise, error if the ABI requires us to return values in the
3568 x87 registers and the user specified -mno-80387. */
3569 if (!TARGET_80387 && in_return)
3570 for (i = 0; i < n; i++)
3571 if (class[i] == X86_64_X87_CLASS
3572 || class[i] == X86_64_X87UP_CLASS
3573 || class[i] == X86_64_COMPLEX_X87_CLASS)
3575 if (!issued_x87_ret_error)
3577 error ("x87 register return with x87 disabled");
3578 issued_x87_ret_error = true;
3583 /* First construct simple cases. Avoid SCmode, since we want to use
3584 single register to pass this type. */
3585 if (n == 1 && mode != SCmode)
3588 case X86_64_INTEGER_CLASS:
3589 case X86_64_INTEGERSI_CLASS:
3590 return gen_rtx_REG (mode, intreg[0]);
3591 case X86_64_SSE_CLASS:
3592 case X86_64_SSESF_CLASS:
3593 case X86_64_SSEDF_CLASS:
3594 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3595 case X86_64_X87_CLASS:
3596 case X86_64_COMPLEX_X87_CLASS:
3597 return gen_rtx_REG (mode, FIRST_STACK_REG);
3598 case X86_64_NO_CLASS:
3599 /* Zero sized array, struct or class. */
3604 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3606 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3609 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3610 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3611 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3612 && class[1] == X86_64_INTEGER_CLASS
3613 && (mode == CDImode || mode == TImode || mode == TFmode)
3614 && intreg[0] + 1 == intreg[1])
3615 return gen_rtx_REG (mode, intreg[0]);
3617 /* Otherwise figure out the entries of the PARALLEL. */
3618 for (i = 0; i < n; i++)
3622 case X86_64_NO_CLASS:
3624 case X86_64_INTEGER_CLASS:
3625 case X86_64_INTEGERSI_CLASS:
3626 /* Merge TImodes on aligned occasions here too. */
3627 if (i * 8 + 8 > bytes)
3628 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3629 else if (class[i] == X86_64_INTEGERSI_CLASS)
3633 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3634 if (tmpmode == BLKmode)
3636 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3637 gen_rtx_REG (tmpmode, *intreg),
3641 case X86_64_SSESF_CLASS:
3642 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3643 gen_rtx_REG (SFmode,
3644 SSE_REGNO (sse_regno)),
3648 case X86_64_SSEDF_CLASS:
3649 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3650 gen_rtx_REG (DFmode,
3651 SSE_REGNO (sse_regno)),
3655 case X86_64_SSE_CLASS:
3656 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3660 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3661 gen_rtx_REG (tmpmode,
3662 SSE_REGNO (sse_regno)),
3664 if (tmpmode == TImode)
3673 /* Empty aligned struct, union or class. */
3677 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3678 for (i = 0; i < nexps; i++)
3679 XVECEXP (ret, 0, i) = exp [i];
3683 /* Update the data in CUM to advance over an argument of mode MODE
3684 and data type TYPE. (TYPE is null for libcalls where that information
3685 may not be available.) */
3688 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3689 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3705 cum->words += words;
3706 cum->nregs -= words;
3707 cum->regno += words;
3709 if (cum->nregs <= 0)
3717 if (cum->float_in_sse < 2)
3720 if (cum->float_in_sse < 1)
3731 if (!type || !AGGREGATE_TYPE_P (type))
3733 cum->sse_words += words;
3734 cum->sse_nregs -= 1;
3735 cum->sse_regno += 1;
3736 if (cum->sse_nregs <= 0)
3748 if (!type || !AGGREGATE_TYPE_P (type))
3750 cum->mmx_words += words;
3751 cum->mmx_nregs -= 1;
3752 cum->mmx_regno += 1;
3753 if (cum->mmx_nregs <= 0)
3764 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3765 tree type, HOST_WIDE_INT words)
3767 int int_nregs, sse_nregs;
3769 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3770 cum->words += words;
3771 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3773 cum->nregs -= int_nregs;
3774 cum->sse_nregs -= sse_nregs;
3775 cum->regno += int_nregs;
3776 cum->sse_regno += sse_nregs;
3779 cum->words += words;
3783 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3784 HOST_WIDE_INT words)
3786 /* Otherwise, this should be passed indirect. */
3787 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3789 cum->words += words;
3798 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3799 tree type, int named ATTRIBUTE_UNUSED)
3801 HOST_WIDE_INT bytes, words;
3803 if (mode == BLKmode)
3804 bytes = int_size_in_bytes (type);
3806 bytes = GET_MODE_SIZE (mode);
3807 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3810 mode = type_natural_mode (type);
3812 if (TARGET_64BIT_MS_ABI)
3813 function_arg_advance_ms_64 (cum, bytes, words);
3814 else if (TARGET_64BIT)
3815 function_arg_advance_64 (cum, mode, type, words);
3817 function_arg_advance_32 (cum, mode, type, bytes, words);
3820 /* Define where to put the arguments to a function.
3821 Value is zero to push the argument on the stack,
3822 or a hard register in which to store the argument.
3824 MODE is the argument's machine mode.
3825 TYPE is the data type of the argument (as a tree).
3826 This is null for libcalls where that information may
3828 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3829 the preceding args and about the function being called.
3830 NAMED is nonzero if this argument is a named parameter
3831 (otherwise it is an extra parameter matching an ellipsis). */
3834 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3835 enum machine_mode orig_mode, tree type,
3836 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3838 static bool warnedsse, warnedmmx;
3840 /* Avoid the AL settings for the Unix64 ABI. */
3841 if (mode == VOIDmode)
3857 if (words <= cum->nregs)
3859 int regno = cum->regno;
3861 /* Fastcall allocates the first two DWORD (SImode) or
3862 smaller arguments to ECX and EDX. */
3865 if (mode == BLKmode || mode == DImode)
3868 /* ECX not EAX is the first allocated register. */
3872 return gen_rtx_REG (mode, regno);
3877 if (cum->float_in_sse < 2)
3880 if (cum->float_in_sse < 1)
3890 if (!type || !AGGREGATE_TYPE_P (type))
3892 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3895 warning (0, "SSE vector argument without SSE enabled "
3899 return gen_reg_or_parallel (mode, orig_mode,
3900 cum->sse_regno + FIRST_SSE_REG);
3908 if (!type || !AGGREGATE_TYPE_P (type))
3910 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3913 warning (0, "MMX vector argument without MMX enabled "
3917 return gen_reg_or_parallel (mode, orig_mode,
3918 cum->mmx_regno + FIRST_MMX_REG);
3927 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3928 enum machine_mode orig_mode, tree type)
3930 /* Handle a hidden AL argument containing number of registers
3931 for varargs x86-64 functions. */
3932 if (mode == VOIDmode)
3933 return GEN_INT (cum->maybe_vaarg
3934 ? (cum->sse_nregs < 0
3939 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3941 &x86_64_int_parameter_registers [cum->regno],
3946 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3947 enum machine_mode orig_mode, int named)
3951 /* Avoid the AL settings for the Unix64 ABI. */
3952 if (mode == VOIDmode)
3955 /* If we've run out of registers, it goes on the stack. */
3956 if (cum->nregs == 0)
3959 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3961 /* Only floating point modes are passed in anything but integer regs. */
3962 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3965 regno = cum->regno + FIRST_SSE_REG;
3970 /* Unnamed floating parameters are passed in both the
3971 SSE and integer registers. */
3972 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3973 t2 = gen_rtx_REG (mode, regno);
3974 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3975 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3976 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3980 return gen_reg_or_parallel (mode, orig_mode, regno);
3984 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3985 tree type, int named)
3987 enum machine_mode mode = omode;
3988 HOST_WIDE_INT bytes, words;
3990 if (mode == BLKmode)
3991 bytes = int_size_in_bytes (type);
3993 bytes = GET_MODE_SIZE (mode);
3994 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3996 /* To simplify the code below, represent vector types with a vector mode
3997 even if MMX/SSE are not active. */
3998 if (type && TREE_CODE (type) == VECTOR_TYPE)
3999 mode = type_natural_mode (type);
4001 if (TARGET_64BIT_MS_ABI)
4002 return function_arg_ms_64 (cum, mode, omode, named);
4003 else if (TARGET_64BIT)
4004 return function_arg_64 (cum, mode, omode, type);
4006 return function_arg_32 (cum, mode, omode, type, bytes, words);
4009 /* A C expression that indicates when an argument must be passed by
4010 reference. If nonzero for an argument, a copy of that argument is
4011 made in memory and a pointer to the argument is passed instead of
4012 the argument itself. The pointer is passed in whatever way is
4013 appropriate for passing a pointer to that type. */
4016 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4017 enum machine_mode mode ATTRIBUTE_UNUSED,
4018 tree type, bool named ATTRIBUTE_UNUSED)
4020 if (TARGET_64BIT_MS_ABI)
4024 /* Arrays are passed by reference. */
4025 if (TREE_CODE (type) == ARRAY_TYPE)
4028 if (AGGREGATE_TYPE_P (type))
4030 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4031 are passed by reference. */
4032 int el2 = exact_log2 (int_size_in_bytes (type));
4033 return !(el2 >= 0 && el2 <= 3);
4037 /* __m128 is passed by reference. */
4038 /* ??? How to handle complex? For now treat them as structs,
4039 and pass them by reference if they're too large. */
4040 if (GET_MODE_SIZE (mode) > 8)
4043 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4049 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4050 ABI. Only called if TARGET_SSE. */
4052 contains_128bit_aligned_vector_p (tree type)
4054 enum machine_mode mode = TYPE_MODE (type);
4055 if (SSE_REG_MODE_P (mode)
4056 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4058 if (TYPE_ALIGN (type) < 128)
4061 if (AGGREGATE_TYPE_P (type))
4063 /* Walk the aggregates recursively. */
4064 switch (TREE_CODE (type))
4068 case QUAL_UNION_TYPE:
4072 /* Walk all the structure fields. */
4073 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4075 if (TREE_CODE (field) == FIELD_DECL
4076 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4083 /* Just for use if some languages passes arrays by value. */
4084 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4095 /* Gives the alignment boundary, in bits, of an argument with the
4096 specified mode and type. */
4099 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4103 align = TYPE_ALIGN (type);
4105 align = GET_MODE_ALIGNMENT (mode);
4106 if (align < PARM_BOUNDARY)
4107 align = PARM_BOUNDARY;
4110 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4111 make an exception for SSE modes since these require 128bit
4114 The handling here differs from field_alignment. ICC aligns MMX
4115 arguments to 4 byte boundaries, while structure fields are aligned
4116 to 8 byte boundaries. */
4118 align = PARM_BOUNDARY;
4121 if (!SSE_REG_MODE_P (mode))
4122 align = PARM_BOUNDARY;
4126 if (!contains_128bit_aligned_vector_p (type))
4127 align = PARM_BOUNDARY;
4135 /* Return true if N is a possible register number of function value. */
4138 ix86_function_value_regno_p (int regno)
4145 case FIRST_FLOAT_REG:
4146 if (TARGET_64BIT_MS_ABI)
4148 return TARGET_FLOAT_RETURNS_IN_80387;
4154 if (TARGET_MACHO || TARGET_64BIT)
4162 /* Define how to find the value returned by a function.
4163 VALTYPE is the data type of the value (as a tree).
4164 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4165 otherwise, FUNC is 0. */
4168 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4169 tree fntype, tree fn)
4173 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4174 we normally prevent this case when mmx is not available. However
4175 some ABIs may require the result to be returned like DImode. */
4176 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4177 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4179 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4180 we prevent this case when sse is not available. However some ABIs
4181 may require the result to be returned like integer TImode. */
4182 else if (mode == TImode
4183 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4184 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4186 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4187 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4188 regno = FIRST_FLOAT_REG;
4190 /* Most things go in %eax. */
4193 /* Override FP return register with %xmm0 for local functions when
4194 SSE math is enabled or for functions with sseregparm attribute. */
4195 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4197 int sse_level = ix86_function_sseregparm (fntype, fn);
4198 if ((sse_level >= 1 && mode == SFmode)
4199 || (sse_level == 2 && mode == DFmode))
4200 regno = FIRST_SSE_REG;
4203 return gen_rtx_REG (orig_mode, regno);
4207 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4212 /* Handle libcalls, which don't provide a type node. */
4213 if (valtype == NULL)
4225 return gen_rtx_REG (mode, FIRST_SSE_REG);
4228 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4232 return gen_rtx_REG (mode, 0);
4236 ret = construct_container (mode, orig_mode, valtype, 1,
4237 REGPARM_MAX, SSE_REGPARM_MAX,
4238 x86_64_int_return_registers, 0);
4240 /* For zero sized structures, construct_container returns NULL, but we
4241 need to keep rest of compiler happy by returning meaningful value. */
4243 ret = gen_rtx_REG (orig_mode, 0);
4249 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4251 unsigned int regno = 0;
4255 if (mode == SFmode || mode == DFmode)
4256 regno = FIRST_SSE_REG;
4257 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4258 regno = FIRST_SSE_REG;
4261 return gen_rtx_REG (orig_mode, regno);
4265 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4266 enum machine_mode orig_mode, enum machine_mode mode)
4271 if (fntype_or_decl && DECL_P (fntype_or_decl))
4272 fn = fntype_or_decl;
4273 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4275 if (TARGET_64BIT_MS_ABI)
4276 return function_value_ms_64 (orig_mode, mode);
4277 else if (TARGET_64BIT)
4278 return function_value_64 (orig_mode, mode, valtype);
4280 return function_value_32 (orig_mode, mode, fntype, fn);
4284 ix86_function_value (tree valtype, tree fntype_or_decl,
4285 bool outgoing ATTRIBUTE_UNUSED)
4287 enum machine_mode mode, orig_mode;
4289 orig_mode = TYPE_MODE (valtype);
4290 mode = type_natural_mode (valtype);
4291 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4295 ix86_libcall_value (enum machine_mode mode)
4297 return ix86_function_value_1 (NULL, NULL, mode, mode);
4300 /* Return true iff type is returned in memory. */
4303 return_in_memory_32 (tree type, enum machine_mode mode)
4307 if (mode == BLKmode)
4310 size = int_size_in_bytes (type);
4312 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4315 if (VECTOR_MODE_P (mode) || mode == TImode)
4317 /* User-created vectors small enough to fit in EAX. */
4321 /* MMX/3dNow values are returned in MM0,
4322 except when it doesn't exits. */
4324 return (TARGET_MMX ? 0 : 1);
4326 /* SSE values are returned in XMM0, except when it doesn't exist. */
4328 return (TARGET_SSE ? 0 : 1);
4343 return_in_memory_64 (tree type, enum machine_mode mode)
4345 int needed_intregs, needed_sseregs;
4346 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4350 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4352 HOST_WIDE_INT size = int_size_in_bytes (type);
4354 /* __m128 and friends are returned in xmm0. */
4355 if (size == 16 && VECTOR_MODE_P (mode))
4358 /* Otherwise, the size must be exactly in [1248]. */
4359 return (size != 1 && size != 2 && size != 4 && size != 8);
4363 ix86_return_in_memory (tree type)
4365 enum machine_mode mode = type_natural_mode (type);
4367 if (TARGET_64BIT_MS_ABI)
4368 return return_in_memory_ms_64 (type, mode);
4369 else if (TARGET_64BIT)
4370 return return_in_memory_64 (type, mode);
4372 return return_in_memory_32 (type, mode);
4375 /* When returning SSE vector types, we have a choice of either
4376 (1) being abi incompatible with a -march switch, or
4377 (2) generating an error.
4378 Given no good solution, I think the safest thing is one warning.
4379 The user won't be able to use -Werror, but....
4381 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4382 called in response to actually generating a caller or callee that
4383 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4384 via aggregate_value_p for general type probing from tree-ssa. */
4387 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4389 static bool warnedsse, warnedmmx;
4391 if (!TARGET_64BIT && type)
4393 /* Look at the return type of the function, not the function type. */
4394 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4396 if (!TARGET_SSE && !warnedsse)
4399 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4402 warning (0, "SSE vector return without SSE enabled "
4407 if (!TARGET_MMX && !warnedmmx)
4409 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4412 warning (0, "MMX vector return without MMX enabled "
4422 /* Create the va_list data type. */
4425 ix86_build_builtin_va_list (void)
4427 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4429 /* For i386 we use plain pointer to argument area. */
4430 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4431 return build_pointer_type (char_type_node);
4433 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4434 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4436 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4437 unsigned_type_node);
4438 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4439 unsigned_type_node);
4440 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4442 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4445 va_list_gpr_counter_field = f_gpr;
4446 va_list_fpr_counter_field = f_fpr;
4448 DECL_FIELD_CONTEXT (f_gpr) = record;
4449 DECL_FIELD_CONTEXT (f_fpr) = record;
4450 DECL_FIELD_CONTEXT (f_ovf) = record;
4451 DECL_FIELD_CONTEXT (f_sav) = record;
4453 TREE_CHAIN (record) = type_decl;
4454 TYPE_NAME (record) = type_decl;
4455 TYPE_FIELDS (record) = f_gpr;
4456 TREE_CHAIN (f_gpr) = f_fpr;
4457 TREE_CHAIN (f_fpr) = f_ovf;
4458 TREE_CHAIN (f_ovf) = f_sav;
4460 layout_type (record);
4462 /* The correct type is an array type of one element. */
4463 return build_array_type (record, build_index_type (size_zero_node));
4466 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4469 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4479 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4482 /* Indicate to allocate space on the stack for varargs save area. */
4483 ix86_save_varrargs_registers = 1;
4484 cfun->stack_alignment_needed = 128;
4486 save_area = frame_pointer_rtx;
4487 set = get_varargs_alias_set ();
4489 for (i = cum->regno;
4491 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4494 mem = gen_rtx_MEM (Pmode,
4495 plus_constant (save_area, i * UNITS_PER_WORD));
4496 MEM_NOTRAP_P (mem) = 1;
4497 set_mem_alias_set (mem, set);
4498 emit_move_insn (mem, gen_rtx_REG (Pmode,
4499 x86_64_int_parameter_registers[i]));
4502 if (cum->sse_nregs && cfun->va_list_fpr_size)
4504 /* Now emit code to save SSE registers. The AX parameter contains number
4505 of SSE parameter registers used to call this function. We use
4506 sse_prologue_save insn template that produces computed jump across
4507 SSE saves. We need some preparation work to get this working. */
4509 label = gen_label_rtx ();
4510 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4512 /* Compute address to jump to :
4513 label - 5*eax + nnamed_sse_arguments*5 */
4514 tmp_reg = gen_reg_rtx (Pmode);
4515 nsse_reg = gen_reg_rtx (Pmode);
4516 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4517 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4518 gen_rtx_MULT (Pmode, nsse_reg,
4523 gen_rtx_CONST (DImode,
4524 gen_rtx_PLUS (DImode,
4526 GEN_INT (cum->sse_regno * 4))));
4528 emit_move_insn (nsse_reg, label_ref);
4529 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4531 /* Compute address of memory block we save into. We always use pointer
4532 pointing 127 bytes after first byte to store - this is needed to keep
4533 instruction size limited by 4 bytes. */
4534 tmp_reg = gen_reg_rtx (Pmode);
4535 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4536 plus_constant (save_area,
4537 8 * REGPARM_MAX + 127)));
4538 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4539 MEM_NOTRAP_P (mem) = 1;
4540 set_mem_alias_set (mem, set);
4541 set_mem_align (mem, BITS_PER_WORD);
4543 /* And finally do the dirty job! */
4544 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4545 GEN_INT (cum->sse_regno), label));
4550 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4552 int set = get_varargs_alias_set ();
4555 for (i = cum->regno; i < REGPARM_MAX; i++)
4559 mem = gen_rtx_MEM (Pmode,
4560 plus_constant (virtual_incoming_args_rtx,
4561 i * UNITS_PER_WORD));
4562 MEM_NOTRAP_P (mem) = 1;
4563 set_mem_alias_set (mem, set);
4565 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4566 emit_move_insn (mem, reg);
4571 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4572 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4575 CUMULATIVE_ARGS next_cum;
4579 /* This argument doesn't appear to be used anymore. Which is good,
4580 because the old code here didn't suppress rtl generation. */
4581 gcc_assert (!no_rtl);
4586 fntype = TREE_TYPE (current_function_decl);
4587 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4588 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4589 != void_type_node));
4591 /* For varargs, we do not want to skip the dummy va_dcl argument.
4592 For stdargs, we do want to skip the last named argument. */
4595 function_arg_advance (&next_cum, mode, type, 1);
4597 if (TARGET_64BIT_MS_ABI)
4598 setup_incoming_varargs_ms_64 (&next_cum);
4600 setup_incoming_varargs_64 (&next_cum);
4603 /* Implement va_start. */
4606 ix86_va_start (tree valist, rtx nextarg)
4608 HOST_WIDE_INT words, n_gpr, n_fpr;
4609 tree f_gpr, f_fpr, f_ovf, f_sav;
4610 tree gpr, fpr, ovf, sav, t;
4613 /* Only 64bit target needs something special. */
4614 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4616 std_expand_builtin_va_start (valist, nextarg);
4620 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4621 f_fpr = TREE_CHAIN (f_gpr);
4622 f_ovf = TREE_CHAIN (f_fpr);
4623 f_sav = TREE_CHAIN (f_ovf);
4625 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4626 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4627 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4628 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4629 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4631 /* Count number of gp and fp argument registers used. */
4632 words = current_function_args_info.words;
4633 n_gpr = current_function_args_info.regno;
4634 n_fpr = current_function_args_info.sse_regno;
4636 if (cfun->va_list_gpr_size)
4638 type = TREE_TYPE (gpr);
4639 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4640 build_int_cst (type, n_gpr * 8));
4641 TREE_SIDE_EFFECTS (t) = 1;
4642 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4645 if (cfun->va_list_fpr_size)
4647 type = TREE_TYPE (fpr);
4648 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4649 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4650 TREE_SIDE_EFFECTS (t) = 1;
4651 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4654 /* Find the overflow area. */
4655 type = TREE_TYPE (ovf);
4656 t = make_tree (type, virtual_incoming_args_rtx);
4658 t = build2 (PLUS_EXPR, type, t,
4659 build_int_cst (type, words * UNITS_PER_WORD));
4660 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4661 TREE_SIDE_EFFECTS (t) = 1;
4662 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4664 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4666 /* Find the register save area.
4667 Prologue of the function save it right above stack frame. */
4668 type = TREE_TYPE (sav);
4669 t = make_tree (type, frame_pointer_rtx);
4670 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4671 TREE_SIDE_EFFECTS (t) = 1;
4672 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4676 /* Implement va_arg. */
4679 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4681 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4682 tree f_gpr, f_fpr, f_ovf, f_sav;
4683 tree gpr, fpr, ovf, sav, t;
4685 tree lab_false, lab_over = NULL_TREE;
4690 enum machine_mode nat_mode;
4692 /* Only 64bit target needs something special. */
4693 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4694 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4696 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4697 f_fpr = TREE_CHAIN (f_gpr);
4698 f_ovf = TREE_CHAIN (f_fpr);
4699 f_sav = TREE_CHAIN (f_ovf);
4701 valist = build_va_arg_indirect_ref (valist);
4702 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4703 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4704 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4705 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4707 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4709 type = build_pointer_type (type);
4710 size = int_size_in_bytes (type);
4711 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4713 nat_mode = type_natural_mode (type);
4714 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4715 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4717 /* Pull the value out of the saved registers. */
4719 addr = create_tmp_var (ptr_type_node, "addr");
4720 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4724 int needed_intregs, needed_sseregs;
4726 tree int_addr, sse_addr;
4728 lab_false = create_artificial_label ();
4729 lab_over = create_artificial_label ();
4731 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4733 need_temp = (!REG_P (container)
4734 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4735 || TYPE_ALIGN (type) > 128));
4737 /* In case we are passing structure, verify that it is consecutive block
4738 on the register save area. If not we need to do moves. */
4739 if (!need_temp && !REG_P (container))
4741 /* Verify that all registers are strictly consecutive */
4742 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4746 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4748 rtx slot = XVECEXP (container, 0, i);
4749 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4750 || INTVAL (XEXP (slot, 1)) != i * 16)
4758 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4760 rtx slot = XVECEXP (container, 0, i);
4761 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4762 || INTVAL (XEXP (slot, 1)) != i * 8)
4774 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4775 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4776 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4777 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4780 /* First ensure that we fit completely in registers. */
4783 t = build_int_cst (TREE_TYPE (gpr),
4784 (REGPARM_MAX - needed_intregs + 1) * 8);
4785 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4786 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4787 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4788 gimplify_and_add (t, pre_p);
4792 t = build_int_cst (TREE_TYPE (fpr),
4793 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4795 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4796 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4797 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4798 gimplify_and_add (t, pre_p);
4801 /* Compute index to start of area used for integer regs. */
4804 /* int_addr = gpr + sav; */
4805 t = fold_convert (ptr_type_node, gpr);
4806 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4807 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4808 gimplify_and_add (t, pre_p);
4812 /* sse_addr = fpr + sav; */
4813 t = fold_convert (ptr_type_node, fpr);
4814 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4815 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4816 gimplify_and_add (t, pre_p);
4821 tree temp = create_tmp_var (type, "va_arg_tmp");
4824 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4825 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4826 gimplify_and_add (t, pre_p);
4828 for (i = 0; i < XVECLEN (container, 0); i++)
4830 rtx slot = XVECEXP (container, 0, i);
4831 rtx reg = XEXP (slot, 0);
4832 enum machine_mode mode = GET_MODE (reg);
4833 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4834 tree addr_type = build_pointer_type (piece_type);
4837 tree dest_addr, dest;
4839 if (SSE_REGNO_P (REGNO (reg)))
4841 src_addr = sse_addr;
4842 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4846 src_addr = int_addr;
4847 src_offset = REGNO (reg) * 8;
4849 src_addr = fold_convert (addr_type, src_addr);
4850 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4851 size_int (src_offset));
4852 src = build_va_arg_indirect_ref (src_addr);
4854 dest_addr = fold_convert (addr_type, addr);
4855 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4856 size_int (INTVAL (XEXP (slot, 1))));
4857 dest = build_va_arg_indirect_ref (dest_addr);
4859 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4860 gimplify_and_add (t, pre_p);
4866 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4867 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4868 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4869 gimplify_and_add (t, pre_p);
4873 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4874 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4875 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4876 gimplify_and_add (t, pre_p);
4879 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4880 gimplify_and_add (t, pre_p);
4882 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4883 append_to_statement_list (t, pre_p);
4886 /* ... otherwise out of the overflow area. */
4888 /* Care for on-stack alignment if needed. */
4889 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4890 || integer_zerop (TYPE_SIZE (type)))
4894 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4895 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4896 build_int_cst (TREE_TYPE (ovf), align - 1));
4897 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4898 build_int_cst (TREE_TYPE (t), -align));
4900 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4902 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4903 gimplify_and_add (t2, pre_p);
4905 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4906 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4907 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4908 gimplify_and_add (t, pre_p);
4912 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4913 append_to_statement_list (t, pre_p);
4916 ptrtype = build_pointer_type (type);
4917 addr = fold_convert (ptrtype, addr);
4920 addr = build_va_arg_indirect_ref (addr);
4921 return build_va_arg_indirect_ref (addr);
4924 /* Return nonzero if OPNUM's MEM should be matched
4925 in movabs* patterns. */
4928 ix86_check_movabs (rtx insn, int opnum)
4932 set = PATTERN (insn);
4933 if (GET_CODE (set) == PARALLEL)
4934 set = XVECEXP (set, 0, 0);
4935 gcc_assert (GET_CODE (set) == SET);
4936 mem = XEXP (set, opnum);
4937 while (GET_CODE (mem) == SUBREG)
4938 mem = SUBREG_REG (mem);
4939 gcc_assert (MEM_P (mem));
4940 return (volatile_ok || !MEM_VOLATILE_P (mem));
4943 /* Initialize the table of extra 80387 mathematical constants. */
4946 init_ext_80387_constants (void)
4948 static const char * cst[5] =
4950 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4951 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4952 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4953 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4954 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4958 for (i = 0; i < 5; i++)
4960 real_from_string (&ext_80387_constants_table[i], cst[i]);
4961 /* Ensure each constant is rounded to XFmode precision. */
4962 real_convert (&ext_80387_constants_table[i],
4963 XFmode, &ext_80387_constants_table[i]);
4966 ext_80387_constants_init = 1;
4969 /* Return true if the constant is something that can be loaded with
4970 a special instruction. */
4973 standard_80387_constant_p (rtx x)
4975 enum machine_mode mode = GET_MODE (x);
4979 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
4982 if (x == CONST0_RTX (mode))
4984 if (x == CONST1_RTX (mode))
4987 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4989 /* For XFmode constants, try to find a special 80387 instruction when
4990 optimizing for size or on those CPUs that benefit from them. */
4992 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4996 if (! ext_80387_constants_init)
4997 init_ext_80387_constants ();
4999 for (i = 0; i < 5; i++)
5000 if (real_identical (&r, &ext_80387_constants_table[i]))
5004 /* Load of the constant -0.0 or -1.0 will be split as
5005 fldz;fchs or fld1;fchs sequence. */
5006 if (real_isnegzero (&r))
5008 if (real_identical (&r, &dconstm1))
5014 /* Return the opcode of the special instruction to be used to load
5018 standard_80387_constant_opcode (rtx x)
5020 switch (standard_80387_constant_p (x))
5044 /* Return the CONST_DOUBLE representing the 80387 constant that is
5045 loaded by the specified special instruction. The argument IDX
5046 matches the return value from standard_80387_constant_p. */
5049 standard_80387_constant_rtx (int idx)
5053 if (! ext_80387_constants_init)
5054 init_ext_80387_constants ();
5070 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5074 /* Return 1 if mode is a valid mode for sse. */
5076 standard_sse_mode_p (enum machine_mode mode)
5093 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5096 standard_sse_constant_p (rtx x)
5098 enum machine_mode mode = GET_MODE (x);
5100 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5102 if (vector_all_ones_operand (x, mode)
5103 && standard_sse_mode_p (mode))
5104 return TARGET_SSE2 ? 2 : -1;
5109 /* Return the opcode of the special instruction to be used to load
5113 standard_sse_constant_opcode (rtx insn, rtx x)
5115 switch (standard_sse_constant_p (x))
5118 if (get_attr_mode (insn) == MODE_V4SF)
5119 return "xorps\t%0, %0";
5120 else if (get_attr_mode (insn) == MODE_V2DF)
5121 return "xorpd\t%0, %0";
5123 return "pxor\t%0, %0";
5125 return "pcmpeqd\t%0, %0";
5130 /* Returns 1 if OP contains a symbol reference */
5133 symbolic_reference_mentioned_p (rtx op)
5138 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5141 fmt = GET_RTX_FORMAT (GET_CODE (op));
5142 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5148 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5149 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5153 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5160 /* Return 1 if it is appropriate to emit `ret' instructions in the
5161 body of a function. Do this only if the epilogue is simple, needing a
5162 couple of insns. Prior to reloading, we can't tell how many registers
5163 must be saved, so return 0 then. Return 0 if there is no frame
5164 marker to de-allocate. */
5167 ix86_can_use_return_insn_p (void)
5169 struct ix86_frame frame;
5171 if (! reload_completed || frame_pointer_needed)
5174 /* Don't allow more than 32 pop, since that's all we can do
5175 with one instruction. */
5176 if (current_function_pops_args
5177 && current_function_args_size >= 32768)
5180 ix86_compute_frame_layout (&frame);
5181 return frame.to_allocate == 0 && frame.nregs == 0;
5184 /* Value should be nonzero if functions must have frame pointers.
5185 Zero means the frame pointer need not be set up (and parms may
5186 be accessed via the stack pointer) in functions that seem suitable. */
5189 ix86_frame_pointer_required (void)
5191 /* If we accessed previous frames, then the generated code expects
5192 to be able to access the saved ebp value in our frame. */
5193 if (cfun->machine->accesses_prev_frame)
5196 /* Several x86 os'es need a frame pointer for other reasons,
5197 usually pertaining to setjmp. */
5198 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5201 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5202 the frame pointer by default. Turn it back on now if we've not
5203 got a leaf function. */
5204 if (TARGET_OMIT_LEAF_FRAME_POINTER
5205 && (!current_function_is_leaf
5206 || ix86_current_function_calls_tls_descriptor))
5209 if (current_function_profile)
5215 /* Record that the current function accesses previous call frames. */
5218 ix86_setup_frame_addresses (void)
5220 cfun->machine->accesses_prev_frame = 1;
5223 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5224 # define USE_HIDDEN_LINKONCE 1
5226 # define USE_HIDDEN_LINKONCE 0
5229 static int pic_labels_used;
5231 /* Fills in the label name that should be used for a pc thunk for
5232 the given register. */
5235 get_pc_thunk_name (char name[32], unsigned int regno)
5237 gcc_assert (!TARGET_64BIT);
5239 if (USE_HIDDEN_LINKONCE)
5240 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5242 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5246 /* This function generates code for -fpic that loads %ebx with
5247 the return address of the caller and then returns. */
5250 ix86_file_end (void)
5255 for (regno = 0; regno < 8; ++regno)
5259 if (! ((pic_labels_used >> regno) & 1))
5262 get_pc_thunk_name (name, regno);
5267 switch_to_section (darwin_sections[text_coal_section]);
5268 fputs ("\t.weak_definition\t", asm_out_file);
5269 assemble_name (asm_out_file, name);
5270 fputs ("\n\t.private_extern\t", asm_out_file);
5271 assemble_name (asm_out_file, name);
5272 fputs ("\n", asm_out_file);
5273 ASM_OUTPUT_LABEL (asm_out_file, name);
5277 if (USE_HIDDEN_LINKONCE)
5281 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5283 TREE_PUBLIC (decl) = 1;
5284 TREE_STATIC (decl) = 1;
5285 DECL_ONE_ONLY (decl) = 1;
5287 (*targetm.asm_out.unique_section) (decl, 0);
5288 switch_to_section (get_named_section (decl, NULL, 0));
5290 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5291 fputs ("\t.hidden\t", asm_out_file);
5292 assemble_name (asm_out_file, name);
5293 fputc ('\n', asm_out_file);
5294 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5298 switch_to_section (text_section);
5299 ASM_OUTPUT_LABEL (asm_out_file, name);
5302 xops[0] = gen_rtx_REG (SImode, regno);
5303 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5304 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5305 output_asm_insn ("ret", xops);
5308 if (NEED_INDICATE_EXEC_STACK)
5309 file_end_indicate_exec_stack ();
5312 /* Emit code for the SET_GOT patterns. */
5315 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5321 if (TARGET_VXWORKS_RTP && flag_pic)
5323 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5324 xops[2] = gen_rtx_MEM (Pmode,
5325 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5326 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5328 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5329 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5330 an unadorned address. */
5331 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5332 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5333 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5337 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5339 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5341 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5344 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5346 output_asm_insn ("call\t%a2", xops);
5349 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5350 is what will be referenced by the Mach-O PIC subsystem. */
5352 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5355 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5356 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5359 output_asm_insn ("pop{l}\t%0", xops);
5364 get_pc_thunk_name (name, REGNO (dest));
5365 pic_labels_used |= 1 << REGNO (dest);
5367 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5368 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5369 output_asm_insn ("call\t%X2", xops);
5370 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5371 is what will be referenced by the Mach-O PIC subsystem. */
5374 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5376 targetm.asm_out.internal_label (asm_out_file, "L",
5377 CODE_LABEL_NUMBER (label));
5384 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5385 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5387 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5392 /* Generate an "push" pattern for input ARG. */
5397 return gen_rtx_SET (VOIDmode,
5399 gen_rtx_PRE_DEC (Pmode,
5400 stack_pointer_rtx)),
5404 /* Return >= 0 if there is an unused call-clobbered register available
5405 for the entire function. */
5408 ix86_select_alt_pic_regnum (void)
5410 if (current_function_is_leaf && !current_function_profile
5411 && !ix86_current_function_calls_tls_descriptor)
5414 for (i = 2; i >= 0; --i)
5415 if (!regs_ever_live[i])
5419 return INVALID_REGNUM;
5422 /* Return 1 if we need to save REGNO. */
5424 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5426 if (pic_offset_table_rtx
5427 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5428 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5429 || current_function_profile
5430 || current_function_calls_eh_return
5431 || current_function_uses_const_pool))
5433 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5438 if (current_function_calls_eh_return && maybe_eh_return)
5443 unsigned test = EH_RETURN_DATA_REGNO (i);
5444 if (test == INVALID_REGNUM)
5451 if (cfun->machine->force_align_arg_pointer
5452 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5455 return (regs_ever_live[regno]
5456 && !call_used_regs[regno]
5457 && !fixed_regs[regno]
5458 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5461 /* Return number of registers to be saved on the stack. */
5464 ix86_nsaved_regs (void)
5469 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5470 if (ix86_save_reg (regno, true))
5475 /* Return the offset between two registers, one to be eliminated, and the other
5476 its replacement, at the start of a routine. */
5479 ix86_initial_elimination_offset (int from, int to)
5481 struct ix86_frame frame;
5482 ix86_compute_frame_layout (&frame);
5484 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5485 return frame.hard_frame_pointer_offset;
5486 else if (from == FRAME_POINTER_REGNUM
5487 && to == HARD_FRAME_POINTER_REGNUM)
5488 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5491 gcc_assert (to == STACK_POINTER_REGNUM);
5493 if (from == ARG_POINTER_REGNUM)
5494 return frame.stack_pointer_offset;
5496 gcc_assert (from == FRAME_POINTER_REGNUM);
5497 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5501 /* Fill structure ix86_frame about frame of currently computed function. */
5504 ix86_compute_frame_layout (struct ix86_frame *frame)
5506 HOST_WIDE_INT total_size;
5507 unsigned int stack_alignment_needed;
5508 HOST_WIDE_INT offset;
5509 unsigned int preferred_alignment;
5510 HOST_WIDE_INT size = get_frame_size ();
5512 frame->nregs = ix86_nsaved_regs ();
5515 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5516 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5518 /* During reload iteration the amount of registers saved can change.
5519 Recompute the value as needed. Do not recompute when amount of registers
5520 didn't change as reload does multiple calls to the function and does not
5521 expect the decision to change within single iteration. */
5523 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5525 int count = frame->nregs;
5527 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5528 /* The fast prologue uses move instead of push to save registers. This
5529 is significantly longer, but also executes faster as modern hardware
5530 can execute the moves in parallel, but can't do that for push/pop.
5532 Be careful about choosing what prologue to emit: When function takes
5533 many instructions to execute we may use slow version as well as in
5534 case function is known to be outside hot spot (this is known with
5535 feedback only). Weight the size of function by number of registers
5536 to save as it is cheap to use one or two push instructions but very
5537 slow to use many of them. */
5539 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5540 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5541 || (flag_branch_probabilities
5542 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5543 cfun->machine->use_fast_prologue_epilogue = false;
5545 cfun->machine->use_fast_prologue_epilogue
5546 = !expensive_function_p (count);
5548 if (TARGET_PROLOGUE_USING_MOVE
5549 && cfun->machine->use_fast_prologue_epilogue)
5550 frame->save_regs_using_mov = true;
5552 frame->save_regs_using_mov = false;
5555 /* Skip return address and saved base pointer. */
5556 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5558 frame->hard_frame_pointer_offset = offset;
5560 /* Do some sanity checking of stack_alignment_needed and
5561 preferred_alignment, since i386 port is the only using those features
5562 that may break easily. */
5564 gcc_assert (!size || stack_alignment_needed);
5565 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5566 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5567 gcc_assert (stack_alignment_needed
5568 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5570 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5571 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5573 /* Register save area */
5574 offset += frame->nregs * UNITS_PER_WORD;
5577 if (ix86_save_varrargs_registers)
5579 offset += X86_64_VARARGS_SIZE;
5580 frame->va_arg_size = X86_64_VARARGS_SIZE;
5583 frame->va_arg_size = 0;
5585 /* Align start of frame for local function. */
5586 frame->padding1 = ((offset + stack_alignment_needed - 1)
5587 & -stack_alignment_needed) - offset;
5589 offset += frame->padding1;
5591 /* Frame pointer points here. */
5592 frame->frame_pointer_offset = offset;
5596 /* Add outgoing arguments area. Can be skipped if we eliminated
5597 all the function calls as dead code.
5598 Skipping is however impossible when function calls alloca. Alloca
5599 expander assumes that last current_function_outgoing_args_size
5600 of stack frame are unused. */
5601 if (ACCUMULATE_OUTGOING_ARGS
5602 && (!current_function_is_leaf || current_function_calls_alloca
5603 || ix86_current_function_calls_tls_descriptor))
5605 offset += current_function_outgoing_args_size;
5606 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5609 frame->outgoing_arguments_size = 0;
5611 /* Align stack boundary. Only needed if we're calling another function
5613 if (!current_function_is_leaf || current_function_calls_alloca
5614 || ix86_current_function_calls_tls_descriptor)
5615 frame->padding2 = ((offset + preferred_alignment - 1)
5616 & -preferred_alignment) - offset;
5618 frame->padding2 = 0;
5620 offset += frame->padding2;
5622 /* We've reached end of stack frame. */
5623 frame->stack_pointer_offset = offset;
5625 /* Size prologue needs to allocate. */
5626 frame->to_allocate =
5627 (size + frame->padding1 + frame->padding2
5628 + frame->outgoing_arguments_size + frame->va_arg_size);
5630 if ((!frame->to_allocate && frame->nregs <= 1)
5631 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5632 frame->save_regs_using_mov = false;
5634 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5635 && current_function_is_leaf
5636 && !ix86_current_function_calls_tls_descriptor)
5638 frame->red_zone_size = frame->to_allocate;
5639 if (frame->save_regs_using_mov)
5640 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5641 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5642 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5645 frame->red_zone_size = 0;
5646 frame->to_allocate -= frame->red_zone_size;
5647 frame->stack_pointer_offset -= frame->red_zone_size;
5649 fprintf (stderr, "\n");
5650 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5651 fprintf (stderr, "size: %ld\n", (long)size);
5652 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5653 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5654 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5655 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5656 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5657 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5658 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5659 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5660 (long)frame->hard_frame_pointer_offset);
5661 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5662 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5663 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5664 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5668 /* Emit code to save registers in the prologue. */
5671 ix86_emit_save_regs (void)
5676 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5677 if (ix86_save_reg (regno, true))
5679 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5680 RTX_FRAME_RELATED_P (insn) = 1;
5684 /* Emit code to save registers using MOV insns. First register
5685 is restored from POINTER + OFFSET. */
5687 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5692 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5693 if (ix86_save_reg (regno, true))
5695 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5697 gen_rtx_REG (Pmode, regno));
5698 RTX_FRAME_RELATED_P (insn) = 1;
5699 offset += UNITS_PER_WORD;
5703 /* Expand prologue or epilogue stack adjustment.
5704 The pattern exist to put a dependency on all ebp-based memory accesses.
5705 STYLE should be negative if instructions should be marked as frame related,
5706 zero if %r11 register is live and cannot be freely used and positive
5710 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5715 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5716 else if (x86_64_immediate_operand (offset, DImode))
5717 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5721 /* r11 is used by indirect sibcall return as well, set before the
5722 epilogue and used after the epilogue. ATM indirect sibcall
5723 shouldn't be used together with huge frame sizes in one
5724 function because of the frame_size check in sibcall.c. */
5726 r11 = gen_rtx_REG (DImode, R11_REG);
5727 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5729 RTX_FRAME_RELATED_P (insn) = 1;
5730 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5734 RTX_FRAME_RELATED_P (insn) = 1;
5737 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5740 ix86_internal_arg_pointer (void)
5742 bool has_force_align_arg_pointer =
5743 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5744 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5745 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5746 && DECL_NAME (current_function_decl)
5747 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5748 && DECL_FILE_SCOPE_P (current_function_decl))
5749 || ix86_force_align_arg_pointer
5750 || has_force_align_arg_pointer)
5752 /* Nested functions can't realign the stack due to a register
5754 if (DECL_CONTEXT (current_function_decl)
5755 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5757 if (ix86_force_align_arg_pointer)
5758 warning (0, "-mstackrealign ignored for nested functions");
5759 if (has_force_align_arg_pointer)
5760 error ("%s not supported for nested functions",
5761 ix86_force_align_arg_pointer_string);
5762 return virtual_incoming_args_rtx;
5764 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5765 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5768 return virtual_incoming_args_rtx;
5771 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5772 This is called from dwarf2out.c to emit call frame instructions
5773 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5775 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5777 rtx unspec = SET_SRC (pattern);
5778 gcc_assert (GET_CODE (unspec) == UNSPEC);
5782 case UNSPEC_REG_SAVE:
5783 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5784 SET_DEST (pattern));
5786 case UNSPEC_DEF_CFA:
5787 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5788 INTVAL (XVECEXP (unspec, 0, 0)));
5795 /* Expand the prologue into a bunch of separate insns. */
5798 ix86_expand_prologue (void)
5802 struct ix86_frame frame;
5803 HOST_WIDE_INT allocate;
5805 ix86_compute_frame_layout (&frame);
5807 if (cfun->machine->force_align_arg_pointer)
5811 /* Grab the argument pointer. */
5812 x = plus_constant (stack_pointer_rtx, 4);
5813 y = cfun->machine->force_align_arg_pointer;
5814 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5815 RTX_FRAME_RELATED_P (insn) = 1;
5817 /* The unwind info consists of two parts: install the fafp as the cfa,
5818 and record the fafp as the "save register" of the stack pointer.
5819 The later is there in order that the unwinder can see where it
5820 should restore the stack pointer across the and insn. */
5821 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5822 x = gen_rtx_SET (VOIDmode, y, x);
5823 RTX_FRAME_RELATED_P (x) = 1;
5824 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5826 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5827 RTX_FRAME_RELATED_P (y) = 1;
5828 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5829 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5830 REG_NOTES (insn) = x;
5832 /* Align the stack. */
5833 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5836 /* And here we cheat like madmen with the unwind info. We force the
5837 cfa register back to sp+4, which is exactly what it was at the
5838 start of the function. Re-pushing the return address results in
5839 the return at the same spot relative to the cfa, and thus is
5840 correct wrt the unwind info. */
5841 x = cfun->machine->force_align_arg_pointer;
5842 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5843 insn = emit_insn (gen_push (x));
5844 RTX_FRAME_RELATED_P (insn) = 1;
5847 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5848 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5849 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5850 REG_NOTES (insn) = x;
5853 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5854 slower on all targets. Also sdb doesn't like it. */
5856 if (frame_pointer_needed)
5858 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5859 RTX_FRAME_RELATED_P (insn) = 1;
5861 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5862 RTX_FRAME_RELATED_P (insn) = 1;
5865 allocate = frame.to_allocate;
5867 if (!frame.save_regs_using_mov)
5868 ix86_emit_save_regs ();
5870 allocate += frame.nregs * UNITS_PER_WORD;
5872 /* When using red zone we may start register saving before allocating
5873 the stack frame saving one cycle of the prologue. */
5874 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5875 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5876 : stack_pointer_rtx,
5877 -frame.nregs * UNITS_PER_WORD);
5881 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5882 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5883 GEN_INT (-allocate), -1);
5886 /* Only valid for Win32. */
5887 rtx eax = gen_rtx_REG (Pmode, 0);
5891 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5893 if (TARGET_64BIT_MS_ABI)
5896 eax_live = ix86_eax_live_at_start_p ();
5900 emit_insn (gen_push (eax));
5901 allocate -= UNITS_PER_WORD;
5904 emit_move_insn (eax, GEN_INT (allocate));
5907 insn = gen_allocate_stack_worker_64 (eax);
5909 insn = gen_allocate_stack_worker_32 (eax);
5910 insn = emit_insn (insn);
5911 RTX_FRAME_RELATED_P (insn) = 1;
5912 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5913 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5914 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5915 t, REG_NOTES (insn));
5919 if (frame_pointer_needed)
5920 t = plus_constant (hard_frame_pointer_rtx,
5923 - frame.nregs * UNITS_PER_WORD);
5925 t = plus_constant (stack_pointer_rtx, allocate);
5926 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
5930 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5932 if (!frame_pointer_needed || !frame.to_allocate)
5933 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5935 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5936 -frame.nregs * UNITS_PER_WORD);
5939 pic_reg_used = false;
5940 if (pic_offset_table_rtx
5941 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5942 || current_function_profile))
5944 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5946 if (alt_pic_reg_used != INVALID_REGNUM)
5947 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5949 pic_reg_used = true;
5956 if (ix86_cmodel == CM_LARGE_PIC)
5958 rtx tmp_reg = gen_rtx_REG (DImode,
5959 FIRST_REX_INT_REG + 3 /* R11 */);
5960 rtx label = gen_label_rtx ();
5962 LABEL_PRESERVE_P (label) = 1;
5963 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5964 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5965 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5966 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5967 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5968 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5969 pic_offset_table_rtx, tmp_reg));
5972 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5975 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5977 /* Even with accurate pre-reload life analysis, we can wind up
5978 deleting all references to the pic register after reload.
5979 Consider if cross-jumping unifies two sides of a branch
5980 controlled by a comparison vs the only read from a global.
5981 In which case, allow the set_got to be deleted, though we're
5982 too late to do anything about the ebx save in the prologue. */
5983 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5986 /* Prevent function calls from be scheduled before the call to mcount.
5987 In the pic_reg_used case, make sure that the got load isn't deleted. */
5988 if (current_function_profile)
5989 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5992 /* Emit code to restore saved registers using MOV insns. First register
5993 is restored from POINTER + OFFSET. */
5995 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5996 int maybe_eh_return)
5999 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6001 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6002 if (ix86_save_reg (regno, maybe_eh_return))
6004 /* Ensure that adjust_address won't be forced to produce pointer
6005 out of range allowed by x86-64 instruction set. */
6006 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6010 r11 = gen_rtx_REG (DImode, R11_REG);
6011 emit_move_insn (r11, GEN_INT (offset));
6012 emit_insn (gen_adddi3 (r11, r11, pointer));
6013 base_address = gen_rtx_MEM (Pmode, r11);
6016 emit_move_insn (gen_rtx_REG (Pmode, regno),
6017 adjust_address (base_address, Pmode, offset));
6018 offset += UNITS_PER_WORD;
6022 /* Restore function stack, frame, and registers. */
6025 ix86_expand_epilogue (int style)
6028 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6029 struct ix86_frame frame;
6030 HOST_WIDE_INT offset;
6032 ix86_compute_frame_layout (&frame);
6034 /* Calculate start of saved registers relative to ebp. Special care
6035 must be taken for the normal return case of a function using
6036 eh_return: the eax and edx registers are marked as saved, but not
6037 restored along this path. */
6038 offset = frame.nregs;
6039 if (current_function_calls_eh_return && style != 2)
6041 offset *= -UNITS_PER_WORD;
6043 /* If we're only restoring one register and sp is not valid then
6044 using a move instruction to restore the register since it's
6045 less work than reloading sp and popping the register.
6047 The default code result in stack adjustment using add/lea instruction,
6048 while this code results in LEAVE instruction (or discrete equivalent),
6049 so it is profitable in some other cases as well. Especially when there
6050 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6051 and there is exactly one register to pop. This heuristic may need some
6052 tuning in future. */
6053 if ((!sp_valid && frame.nregs <= 1)
6054 || (TARGET_EPILOGUE_USING_MOVE
6055 && cfun->machine->use_fast_prologue_epilogue
6056 && (frame.nregs > 1 || frame.to_allocate))
6057 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6058 || (frame_pointer_needed && TARGET_USE_LEAVE
6059 && cfun->machine->use_fast_prologue_epilogue
6060 && frame.nregs == 1)
6061 || current_function_calls_eh_return)
6063 /* Restore registers. We can use ebp or esp to address the memory
6064 locations. If both are available, default to ebp, since offsets
6065 are known to be small. Only exception is esp pointing directly to the
6066 end of block of saved registers, where we may simplify addressing
6069 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6070 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6071 frame.to_allocate, style == 2);
6073 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6074 offset, style == 2);
6076 /* eh_return epilogues need %ecx added to the stack pointer. */
6079 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6081 if (frame_pointer_needed)
6083 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6084 tmp = plus_constant (tmp, UNITS_PER_WORD);
6085 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6087 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6088 emit_move_insn (hard_frame_pointer_rtx, tmp);
6090 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6095 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6096 tmp = plus_constant (tmp, (frame.to_allocate
6097 + frame.nregs * UNITS_PER_WORD));
6098 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6101 else if (!frame_pointer_needed)
6102 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6103 GEN_INT (frame.to_allocate
6104 + frame.nregs * UNITS_PER_WORD),
6106 /* If not an i386, mov & pop is faster than "leave". */
6107 else if (TARGET_USE_LEAVE || optimize_size
6108 || !cfun->machine->use_fast_prologue_epilogue)
6109 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6112 pro_epilogue_adjust_stack (stack_pointer_rtx,
6113 hard_frame_pointer_rtx,
6116 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6118 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6123 /* First step is to deallocate the stack frame so that we can
6124 pop the registers. */
6127 gcc_assert (frame_pointer_needed);
6128 pro_epilogue_adjust_stack (stack_pointer_rtx,
6129 hard_frame_pointer_rtx,
6130 GEN_INT (offset), style);
6132 else if (frame.to_allocate)
6133 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6134 GEN_INT (frame.to_allocate), style);
6136 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6137 if (ix86_save_reg (regno, false))
6140 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6142 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6144 if (frame_pointer_needed)
6146 /* Leave results in shorter dependency chains on CPUs that are
6147 able to grok it fast. */
6148 if (TARGET_USE_LEAVE)
6149 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6150 else if (TARGET_64BIT)
6151 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6153 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6157 if (cfun->machine->force_align_arg_pointer)
6159 emit_insn (gen_addsi3 (stack_pointer_rtx,
6160 cfun->machine->force_align_arg_pointer,
6164 /* Sibcall epilogues don't want a return instruction. */
6168 if (current_function_pops_args && current_function_args_size)
6170 rtx popc = GEN_INT (current_function_pops_args);
6172 /* i386 can only pop 64K bytes. If asked to pop more, pop
6173 return address, do explicit add, and jump indirectly to the
6176 if (current_function_pops_args >= 65536)
6178 rtx ecx = gen_rtx_REG (SImode, 2);
6180 /* There is no "pascal" calling convention in any 64bit ABI. */
6181 gcc_assert (!TARGET_64BIT);
6183 emit_insn (gen_popsi1 (ecx));
6184 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6185 emit_jump_insn (gen_return_indirect_internal (ecx));
6188 emit_jump_insn (gen_return_pop_internal (popc));
6191 emit_jump_insn (gen_return_internal ());
6194 /* Reset from the function's potential modifications. */
6197 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6198 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6200 if (pic_offset_table_rtx)
6201 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6203 /* Mach-O doesn't support labels at the end of objects, so if
6204 it looks like we might want one, insert a NOP. */
6206 rtx insn = get_last_insn ();
6209 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6210 insn = PREV_INSN (insn);
6214 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6215 fputs ("\tnop\n", file);
6221 /* Extract the parts of an RTL expression that is a valid memory address
6222 for an instruction. Return 0 if the structure of the address is
6223 grossly off. Return -1 if the address contains ASHIFT, so it is not
6224 strictly valid, but still used for computing length of lea instruction. */
6227 ix86_decompose_address (rtx addr, struct ix86_address *out)
6229 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6230 rtx base_reg, index_reg;
6231 HOST_WIDE_INT scale = 1;
6232 rtx scale_rtx = NULL_RTX;
6234 enum ix86_address_seg seg = SEG_DEFAULT;
6236 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6238 else if (GET_CODE (addr) == PLUS)
6248 addends[n++] = XEXP (op, 1);
6251 while (GET_CODE (op) == PLUS);
6256 for (i = n; i >= 0; --i)
6259 switch (GET_CODE (op))
6264 index = XEXP (op, 0);
6265 scale_rtx = XEXP (op, 1);
6269 if (XINT (op, 1) == UNSPEC_TP
6270 && TARGET_TLS_DIRECT_SEG_REFS
6271 && seg == SEG_DEFAULT)
6272 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6301 else if (GET_CODE (addr) == MULT)
6303 index = XEXP (addr, 0); /* index*scale */
6304 scale_rtx = XEXP (addr, 1);
6306 else if (GET_CODE (addr) == ASHIFT)
6310 /* We're called for lea too, which implements ashift on occasion. */
6311 index = XEXP (addr, 0);
6312 tmp = XEXP (addr, 1);
6313 if (!CONST_INT_P (tmp))
6315 scale = INTVAL (tmp);
6316 if ((unsigned HOST_WIDE_INT) scale > 3)
6322 disp = addr; /* displacement */
6324 /* Extract the integral value of scale. */
6327 if (!CONST_INT_P (scale_rtx))
6329 scale = INTVAL (scale_rtx);
6332 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6333 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6335 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6336 if (base_reg && index_reg && scale == 1
6337 && (index_reg == arg_pointer_rtx
6338 || index_reg == frame_pointer_rtx
6339 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6342 tmp = base, base = index, index = tmp;
6343 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6346 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6347 if ((base_reg == hard_frame_pointer_rtx
6348 || base_reg == frame_pointer_rtx
6349 || base_reg == arg_pointer_rtx) && !disp)
6352 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6353 Avoid this by transforming to [%esi+0]. */
6354 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6355 && base_reg && !index_reg && !disp
6357 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6360 /* Special case: encode reg+reg instead of reg*2. */
6361 if (!base && index && scale && scale == 2)
6362 base = index, base_reg = index_reg, scale = 1;
6364 /* Special case: scaling cannot be encoded without base or displacement. */
6365 if (!base && !disp && index && scale != 1)
6377 /* Return cost of the memory address x.
6378 For i386, it is better to use a complex address than let gcc copy
6379 the address into a reg and make a new pseudo. But not if the address
6380 requires to two regs - that would mean more pseudos with longer
6383 ix86_address_cost (rtx x)
6385 struct ix86_address parts;
6387 int ok = ix86_decompose_address (x, &parts);
6391 if (parts.base && GET_CODE (parts.base) == SUBREG)
6392 parts.base = SUBREG_REG (parts.base);
6393 if (parts.index && GET_CODE (parts.index) == SUBREG)
6394 parts.index = SUBREG_REG (parts.index);
6396 /* More complex memory references are better. */
6397 if (parts.disp && parts.disp != const0_rtx)
6399 if (parts.seg != SEG_DEFAULT)
6402 /* Attempt to minimize number of registers in the address. */
6404 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6406 && (!REG_P (parts.index)
6407 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6411 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6413 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6414 && parts.base != parts.index)
6417 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6418 since it's predecode logic can't detect the length of instructions
6419 and it degenerates to vector decoded. Increase cost of such
6420 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6421 to split such addresses or even refuse such addresses at all.
6423 Following addressing modes are affected:
6428 The first and last case may be avoidable by explicitly coding the zero in
6429 memory address, but I don't have AMD-K6 machine handy to check this
6433 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6434 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6435 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6441 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6442 this is used for to form addresses to local data when -fPIC is in
6446 darwin_local_data_pic (rtx disp)
6448 if (GET_CODE (disp) == MINUS)
6450 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6451 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6452 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6454 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6455 if (! strcmp (sym_name, "<pic base>"))
6463 /* Determine if a given RTX is a valid constant. We already know this
6464 satisfies CONSTANT_P. */
6467 legitimate_constant_p (rtx x)
6469 switch (GET_CODE (x))
6474 if (GET_CODE (x) == PLUS)
6476 if (!CONST_INT_P (XEXP (x, 1)))
6481 if (TARGET_MACHO && darwin_local_data_pic (x))
6484 /* Only some unspecs are valid as "constants". */
6485 if (GET_CODE (x) == UNSPEC)
6486 switch (XINT (x, 1))
6491 return TARGET_64BIT;
6494 x = XVECEXP (x, 0, 0);
6495 return (GET_CODE (x) == SYMBOL_REF
6496 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6498 x = XVECEXP (x, 0, 0);
6499 return (GET_CODE (x) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6505 /* We must have drilled down to a symbol. */
6506 if (GET_CODE (x) == LABEL_REF)
6508 if (GET_CODE (x) != SYMBOL_REF)
6513 /* TLS symbols are never valid. */
6514 if (SYMBOL_REF_TLS_MODEL (x))
6517 /* DLLIMPORT symbols are never valid. */
6518 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6519 && SYMBOL_REF_DLLIMPORT_P (x))
6524 if (GET_MODE (x) == TImode
6525 && x != CONST0_RTX (TImode)
6531 if (x == CONST0_RTX (GET_MODE (x)))
6539 /* Otherwise we handle everything else in the move patterns. */
6543 /* Determine if it's legal to put X into the constant pool. This
6544 is not possible for the address of thread-local symbols, which
6545 is checked above. */
6548 ix86_cannot_force_const_mem (rtx x)
6550 /* We can always put integral constants and vectors in memory. */
6551 switch (GET_CODE (x))
6561 return !legitimate_constant_p (x);
6564 /* Determine if a given RTX is a valid constant address. */
6567 constant_address_p (rtx x)
6569 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6572 /* Nonzero if the constant value X is a legitimate general operand
6573 when generating PIC code. It is given that flag_pic is on and
6574 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6577 legitimate_pic_operand_p (rtx x)
6581 switch (GET_CODE (x))
6584 inner = XEXP (x, 0);
6585 if (GET_CODE (inner) == PLUS
6586 && CONST_INT_P (XEXP (inner, 1)))
6587 inner = XEXP (inner, 0);
6589 /* Only some unspecs are valid as "constants". */
6590 if (GET_CODE (inner) == UNSPEC)
6591 switch (XINT (inner, 1))
6596 return TARGET_64BIT;
6598 x = XVECEXP (inner, 0, 0);
6599 return (GET_CODE (x) == SYMBOL_REF
6600 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6608 return legitimate_pic_address_disp_p (x);
6615 /* Determine if a given CONST RTX is a valid memory displacement
6619 legitimate_pic_address_disp_p (rtx disp)
6623 /* In 64bit mode we can allow direct addresses of symbols and labels
6624 when they are not dynamic symbols. */
6627 rtx op0 = disp, op1;
6629 switch (GET_CODE (disp))
6635 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6637 op0 = XEXP (XEXP (disp, 0), 0);
6638 op1 = XEXP (XEXP (disp, 0), 1);
6639 if (!CONST_INT_P (op1)
6640 || INTVAL (op1) >= 16*1024*1024
6641 || INTVAL (op1) < -16*1024*1024)
6643 if (GET_CODE (op0) == LABEL_REF)
6645 if (GET_CODE (op0) != SYMBOL_REF)
6650 /* TLS references should always be enclosed in UNSPEC. */
6651 if (SYMBOL_REF_TLS_MODEL (op0))
6653 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6654 && ix86_cmodel != CM_LARGE_PIC)
6662 if (GET_CODE (disp) != CONST)
6664 disp = XEXP (disp, 0);
6668 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6669 of GOT tables. We should not need these anyway. */
6670 if (GET_CODE (disp) != UNSPEC
6671 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6672 && XINT (disp, 1) != UNSPEC_GOTOFF
6673 && XINT (disp, 1) != UNSPEC_PLTOFF))
6676 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6677 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6683 if (GET_CODE (disp) == PLUS)
6685 if (!CONST_INT_P (XEXP (disp, 1)))
6687 disp = XEXP (disp, 0);
6691 if (TARGET_MACHO && darwin_local_data_pic (disp))
6694 if (GET_CODE (disp) != UNSPEC)
6697 switch (XINT (disp, 1))
6702 /* We need to check for both symbols and labels because VxWorks loads
6703 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6705 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6706 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6708 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6709 While ABI specify also 32bit relocation but we don't produce it in
6710 small PIC model at all. */
6711 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6712 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6714 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6716 case UNSPEC_GOTTPOFF:
6717 case UNSPEC_GOTNTPOFF:
6718 case UNSPEC_INDNTPOFF:
6721 disp = XVECEXP (disp, 0, 0);
6722 return (GET_CODE (disp) == SYMBOL_REF
6723 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6725 disp = XVECEXP (disp, 0, 0);
6726 return (GET_CODE (disp) == SYMBOL_REF
6727 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6729 disp = XVECEXP (disp, 0, 0);
6730 return (GET_CODE (disp) == SYMBOL_REF
6731 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6737 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6738 memory address for an instruction. The MODE argument is the machine mode
6739 for the MEM expression that wants to use this address.
6741 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6742 convert common non-canonical forms to canonical form so that they will
6746 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6747 rtx addr, int strict)
6749 struct ix86_address parts;
6750 rtx base, index, disp;
6751 HOST_WIDE_INT scale;
6752 const char *reason = NULL;
6753 rtx reason_rtx = NULL_RTX;
6755 if (ix86_decompose_address (addr, &parts) <= 0)
6757 reason = "decomposition failed";
6762 index = parts.index;
6764 scale = parts.scale;
6766 /* Validate base register.
6768 Don't allow SUBREG's that span more than a word here. It can lead to spill
6769 failures when the base is one word out of a two word structure, which is
6770 represented internally as a DImode int. */
6779 else if (GET_CODE (base) == SUBREG
6780 && REG_P (SUBREG_REG (base))
6781 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6783 reg = SUBREG_REG (base);
6786 reason = "base is not a register";
6790 if (GET_MODE (base) != Pmode)
6792 reason = "base is not in Pmode";
6796 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6797 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6799 reason = "base is not valid";
6804 /* Validate index register.
6806 Don't allow SUBREG's that span more than a word here -- same as above. */
6815 else if (GET_CODE (index) == SUBREG
6816 && REG_P (SUBREG_REG (index))
6817 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6819 reg = SUBREG_REG (index);
6822 reason = "index is not a register";
6826 if (GET_MODE (index) != Pmode)
6828 reason = "index is not in Pmode";
6832 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6833 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6835 reason = "index is not valid";
6840 /* Validate scale factor. */
6843 reason_rtx = GEN_INT (scale);
6846 reason = "scale without index";
6850 if (scale != 2 && scale != 4 && scale != 8)
6852 reason = "scale is not a valid multiplier";
6857 /* Validate displacement. */
6862 if (GET_CODE (disp) == CONST
6863 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6864 switch (XINT (XEXP (disp, 0), 1))
6866 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6867 used. While ABI specify also 32bit relocations, we don't produce
6868 them at all and use IP relative instead. */
6871 gcc_assert (flag_pic);
6873 goto is_legitimate_pic;
6874 reason = "64bit address unspec";
6877 case UNSPEC_GOTPCREL:
6878 gcc_assert (flag_pic);
6879 goto is_legitimate_pic;
6881 case UNSPEC_GOTTPOFF:
6882 case UNSPEC_GOTNTPOFF:
6883 case UNSPEC_INDNTPOFF:
6889 reason = "invalid address unspec";
6893 else if (SYMBOLIC_CONST (disp)
6897 && MACHOPIC_INDIRECT
6898 && !machopic_operand_p (disp)
6904 if (TARGET_64BIT && (index || base))
6906 /* foo@dtpoff(%rX) is ok. */
6907 if (GET_CODE (disp) != CONST
6908 || GET_CODE (XEXP (disp, 0)) != PLUS
6909 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6910 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6911 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6912 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6914 reason = "non-constant pic memory reference";
6918 else if (! legitimate_pic_address_disp_p (disp))
6920 reason = "displacement is an invalid pic construct";
6924 /* This code used to verify that a symbolic pic displacement
6925 includes the pic_offset_table_rtx register.
6927 While this is good idea, unfortunately these constructs may
6928 be created by "adds using lea" optimization for incorrect
6937 This code is nonsensical, but results in addressing
6938 GOT table with pic_offset_table_rtx base. We can't
6939 just refuse it easily, since it gets matched by
6940 "addsi3" pattern, that later gets split to lea in the
6941 case output register differs from input. While this
6942 can be handled by separate addsi pattern for this case
6943 that never results in lea, this seems to be easier and
6944 correct fix for crash to disable this test. */
6946 else if (GET_CODE (disp) != LABEL_REF
6947 && !CONST_INT_P (disp)
6948 && (GET_CODE (disp) != CONST
6949 || !legitimate_constant_p (disp))
6950 && (GET_CODE (disp) != SYMBOL_REF
6951 || !legitimate_constant_p (disp)))
6953 reason = "displacement is not constant";
6956 else if (TARGET_64BIT
6957 && !x86_64_immediate_operand (disp, VOIDmode))
6959 reason = "displacement is out of range";
6964 /* Everything looks valid. */
6971 /* Return a unique alias set for the GOT. */
6973 static HOST_WIDE_INT
6974 ix86_GOT_alias_set (void)
6976 static HOST_WIDE_INT set = -1;
6978 set = new_alias_set ();
6982 /* Return a legitimate reference for ORIG (an address) using the
6983 register REG. If REG is 0, a new pseudo is generated.
6985 There are two types of references that must be handled:
6987 1. Global data references must load the address from the GOT, via
6988 the PIC reg. An insn is emitted to do this load, and the reg is
6991 2. Static data references, constant pool addresses, and code labels
6992 compute the address as an offset from the GOT, whose base is in
6993 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6994 differentiate them from global data objects. The returned
6995 address is the PIC reg + an unspec constant.
6997 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6998 reg also appears in the address. */
7001 legitimize_pic_address (rtx orig, rtx reg)
7008 if (TARGET_MACHO && !TARGET_64BIT)
7011 reg = gen_reg_rtx (Pmode);
7012 /* Use the generic Mach-O PIC machinery. */
7013 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7017 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7019 else if (TARGET_64BIT
7020 && ix86_cmodel != CM_SMALL_PIC
7021 && gotoff_operand (addr, Pmode))
7024 /* This symbol may be referenced via a displacement from the PIC
7025 base address (@GOTOFF). */
7027 if (reload_in_progress)
7028 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7029 if (GET_CODE (addr) == CONST)
7030 addr = XEXP (addr, 0);
7031 if (GET_CODE (addr) == PLUS)
7033 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7035 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7038 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7039 new = gen_rtx_CONST (Pmode, new);
7041 tmpreg = gen_reg_rtx (Pmode);
7044 emit_move_insn (tmpreg, new);
7048 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7049 tmpreg, 1, OPTAB_DIRECT);
7052 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7054 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7056 /* This symbol may be referenced via a displacement from the PIC
7057 base address (@GOTOFF). */
7059 if (reload_in_progress)
7060 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7061 if (GET_CODE (addr) == CONST)
7062 addr = XEXP (addr, 0);
7063 if (GET_CODE (addr) == PLUS)
7065 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7067 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7070 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7071 new = gen_rtx_CONST (Pmode, new);
7072 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7076 emit_move_insn (reg, new);
7080 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7081 /* We can't use @GOTOFF for text labels on VxWorks;
7082 see gotoff_operand. */
7083 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7085 /* Given that we've already handled dllimport variables separately
7086 in legitimize_address, and all other variables should satisfy
7087 legitimate_pic_address_disp_p, we should never arrive here. */
7088 gcc_assert (!TARGET_64BIT_MS_ABI);
7090 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7092 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7093 new = gen_rtx_CONST (Pmode, new);
7094 new = gen_const_mem (Pmode, new);
7095 set_mem_alias_set (new, ix86_GOT_alias_set ());
7098 reg = gen_reg_rtx (Pmode);
7099 /* Use directly gen_movsi, otherwise the address is loaded
7100 into register for CSE. We don't want to CSE this addresses,
7101 instead we CSE addresses from the GOT table, so skip this. */
7102 emit_insn (gen_movsi (reg, new));
7107 /* This symbol must be referenced via a load from the
7108 Global Offset Table (@GOT). */
7110 if (reload_in_progress)
7111 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7112 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7113 new = gen_rtx_CONST (Pmode, new);
7115 new = force_reg (Pmode, new);
7116 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7117 new = gen_const_mem (Pmode, new);
7118 set_mem_alias_set (new, ix86_GOT_alias_set ());
7121 reg = gen_reg_rtx (Pmode);
7122 emit_move_insn (reg, new);
7128 if (CONST_INT_P (addr)
7129 && !x86_64_immediate_operand (addr, VOIDmode))
7133 emit_move_insn (reg, addr);
7137 new = force_reg (Pmode, addr);
7139 else if (GET_CODE (addr) == CONST)
7141 addr = XEXP (addr, 0);
7143 /* We must match stuff we generate before. Assume the only
7144 unspecs that can get here are ours. Not that we could do
7145 anything with them anyway.... */
7146 if (GET_CODE (addr) == UNSPEC
7147 || (GET_CODE (addr) == PLUS
7148 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7150 gcc_assert (GET_CODE (addr) == PLUS);
7152 if (GET_CODE (addr) == PLUS)
7154 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7156 /* Check first to see if this is a constant offset from a @GOTOFF
7157 symbol reference. */
7158 if (gotoff_operand (op0, Pmode)
7159 && CONST_INT_P (op1))
7163 if (reload_in_progress)
7164 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7165 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7167 new = gen_rtx_PLUS (Pmode, new, op1);
7168 new = gen_rtx_CONST (Pmode, new);
7169 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7173 emit_move_insn (reg, new);
7179 if (INTVAL (op1) < -16*1024*1024
7180 || INTVAL (op1) >= 16*1024*1024)
7182 if (!x86_64_immediate_operand (op1, Pmode))
7183 op1 = force_reg (Pmode, op1);
7184 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7190 base = legitimize_pic_address (XEXP (addr, 0), reg);
7191 new = legitimize_pic_address (XEXP (addr, 1),
7192 base == reg ? NULL_RTX : reg);
7194 if (CONST_INT_P (new))
7195 new = plus_constant (base, INTVAL (new));
7198 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7200 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7201 new = XEXP (new, 1);
7203 new = gen_rtx_PLUS (Pmode, base, new);
7211 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7214 get_thread_pointer (int to_reg)
7218 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7222 reg = gen_reg_rtx (Pmode);
7223 insn = gen_rtx_SET (VOIDmode, reg, tp);
7224 insn = emit_insn (insn);
7229 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7230 false if we expect this to be used for a memory address and true if
7231 we expect to load the address into a register. */
7234 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7236 rtx dest, base, off, pic, tp;
7241 case TLS_MODEL_GLOBAL_DYNAMIC:
7242 dest = gen_reg_rtx (Pmode);
7243 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7245 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7247 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7250 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7251 insns = get_insns ();
7254 emit_libcall_block (insns, dest, rax, x);
7256 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7257 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7259 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7261 if (TARGET_GNU2_TLS)
7263 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7265 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7269 case TLS_MODEL_LOCAL_DYNAMIC:
7270 base = gen_reg_rtx (Pmode);
7271 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7273 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7275 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7278 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7279 insns = get_insns ();
7282 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7283 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7284 emit_libcall_block (insns, base, rax, note);
7286 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7287 emit_insn (gen_tls_local_dynamic_base_64 (base));
7289 emit_insn (gen_tls_local_dynamic_base_32 (base));
7291 if (TARGET_GNU2_TLS)
7293 rtx x = ix86_tls_module_base ();
7295 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7296 gen_rtx_MINUS (Pmode, x, tp));
7299 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7300 off = gen_rtx_CONST (Pmode, off);
7302 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7304 if (TARGET_GNU2_TLS)
7306 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7308 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7313 case TLS_MODEL_INITIAL_EXEC:
7317 type = UNSPEC_GOTNTPOFF;
7321 if (reload_in_progress)
7322 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7323 pic = pic_offset_table_rtx;
7324 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7326 else if (!TARGET_ANY_GNU_TLS)
7328 pic = gen_reg_rtx (Pmode);
7329 emit_insn (gen_set_got (pic));
7330 type = UNSPEC_GOTTPOFF;
7335 type = UNSPEC_INDNTPOFF;
7338 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7339 off = gen_rtx_CONST (Pmode, off);
7341 off = gen_rtx_PLUS (Pmode, pic, off);
7342 off = gen_const_mem (Pmode, off);
7343 set_mem_alias_set (off, ix86_GOT_alias_set ());
7345 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7347 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7348 off = force_reg (Pmode, off);
7349 return gen_rtx_PLUS (Pmode, base, off);
7353 base = get_thread_pointer (true);
7354 dest = gen_reg_rtx (Pmode);
7355 emit_insn (gen_subsi3 (dest, base, off));
7359 case TLS_MODEL_LOCAL_EXEC:
7360 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7361 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7362 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7363 off = gen_rtx_CONST (Pmode, off);
7365 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7367 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7368 return gen_rtx_PLUS (Pmode, base, off);
7372 base = get_thread_pointer (true);
7373 dest = gen_reg_rtx (Pmode);
7374 emit_insn (gen_subsi3 (dest, base, off));
7385 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7388 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7389 htab_t dllimport_map;
7392 get_dllimport_decl (tree decl)
7394 struct tree_map *h, in;
7398 size_t namelen, prefixlen;
7404 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7406 in.hash = htab_hash_pointer (decl);
7407 in.base.from = decl;
7408 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7413 *loc = h = ggc_alloc (sizeof (struct tree_map));
7415 h->base.from = decl;
7416 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7417 DECL_ARTIFICIAL (to) = 1;
7418 DECL_IGNORED_P (to) = 1;
7419 DECL_EXTERNAL (to) = 1;
7420 TREE_READONLY (to) = 1;
7422 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7423 name = targetm.strip_name_encoding (name);
7424 if (name[0] == FASTCALL_PREFIX)
7430 prefix = "*__imp__";
7432 namelen = strlen (name);
7433 prefixlen = strlen (prefix);
7434 imp_name = alloca (namelen + prefixlen + 1);
7435 memcpy (imp_name, prefix, prefixlen);
7436 memcpy (imp_name + prefixlen, name, namelen + 1);
7438 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7439 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7440 SET_SYMBOL_REF_DECL (rtl, to);
7441 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7443 rtl = gen_const_mem (Pmode, rtl);
7444 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7446 SET_DECL_RTL (to, rtl);
7451 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7452 true if we require the result be a register. */
7455 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7460 gcc_assert (SYMBOL_REF_DECL (symbol));
7461 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7463 x = DECL_RTL (imp_decl);
7465 x = force_reg (Pmode, x);
7469 /* Try machine-dependent ways of modifying an illegitimate address
7470 to be legitimate. If we find one, return the new, valid address.
7471 This macro is used in only one place: `memory_address' in explow.c.
7473 OLDX is the address as it was before break_out_memory_refs was called.
7474 In some cases it is useful to look at this to decide what needs to be done.
7476 MODE and WIN are passed so that this macro can use
7477 GO_IF_LEGITIMATE_ADDRESS.
7479 It is always safe for this macro to do nothing. It exists to recognize
7480 opportunities to optimize the output.
7482 For the 80386, we handle X+REG by loading X into a register R and
7483 using R+REG. R will go in a general reg and indexing will be used.
7484 However, if REG is a broken-out memory address or multiplication,
7485 nothing needs to be done because REG can certainly go in a general reg.
7487 When -fpic is used, special handling is needed for symbolic references.
7488 See comments by legitimize_pic_address in i386.c for details. */
7491 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7496 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7498 return legitimize_tls_address (x, log, false);
7499 if (GET_CODE (x) == CONST
7500 && GET_CODE (XEXP (x, 0)) == PLUS
7501 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7502 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7504 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7505 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7508 if (flag_pic && SYMBOLIC_CONST (x))
7509 return legitimize_pic_address (x, 0);
7511 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7513 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7514 return legitimize_dllimport_symbol (x, true);
7515 if (GET_CODE (x) == CONST
7516 && GET_CODE (XEXP (x, 0)) == PLUS
7517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7518 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7520 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7521 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7525 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7526 if (GET_CODE (x) == ASHIFT
7527 && CONST_INT_P (XEXP (x, 1))
7528 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7531 log = INTVAL (XEXP (x, 1));
7532 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7533 GEN_INT (1 << log));
7536 if (GET_CODE (x) == PLUS)
7538 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7540 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7541 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7542 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7545 log = INTVAL (XEXP (XEXP (x, 0), 1));
7546 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7547 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7548 GEN_INT (1 << log));
7551 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7552 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7553 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7556 log = INTVAL (XEXP (XEXP (x, 1), 1));
7557 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7558 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7559 GEN_INT (1 << log));
7562 /* Put multiply first if it isn't already. */
7563 if (GET_CODE (XEXP (x, 1)) == MULT)
7565 rtx tmp = XEXP (x, 0);
7566 XEXP (x, 0) = XEXP (x, 1);
7571 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7572 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7573 created by virtual register instantiation, register elimination, and
7574 similar optimizations. */
7575 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7578 x = gen_rtx_PLUS (Pmode,
7579 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7580 XEXP (XEXP (x, 1), 0)),
7581 XEXP (XEXP (x, 1), 1));
7585 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7586 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7587 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7588 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7589 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7590 && CONSTANT_P (XEXP (x, 1)))
7593 rtx other = NULL_RTX;
7595 if (CONST_INT_P (XEXP (x, 1)))
7597 constant = XEXP (x, 1);
7598 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7600 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7602 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7603 other = XEXP (x, 1);
7611 x = gen_rtx_PLUS (Pmode,
7612 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7613 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7614 plus_constant (other, INTVAL (constant)));
7618 if (changed && legitimate_address_p (mode, x, FALSE))
7621 if (GET_CODE (XEXP (x, 0)) == MULT)
7624 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7627 if (GET_CODE (XEXP (x, 1)) == MULT)
7630 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7634 && REG_P (XEXP (x, 1))
7635 && REG_P (XEXP (x, 0)))
7638 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7641 x = legitimize_pic_address (x, 0);
7644 if (changed && legitimate_address_p (mode, x, FALSE))
7647 if (REG_P (XEXP (x, 0)))
7649 rtx temp = gen_reg_rtx (Pmode);
7650 rtx val = force_operand (XEXP (x, 1), temp);
7652 emit_move_insn (temp, val);
7658 else if (REG_P (XEXP (x, 1)))
7660 rtx temp = gen_reg_rtx (Pmode);
7661 rtx val = force_operand (XEXP (x, 0), temp);
7663 emit_move_insn (temp, val);
7673 /* Print an integer constant expression in assembler syntax. Addition
7674 and subtraction are the only arithmetic that may appear in these
7675 expressions. FILE is the stdio stream to write to, X is the rtx, and
7676 CODE is the operand print code from the output string. */
7679 output_pic_addr_const (FILE *file, rtx x, int code)
7683 switch (GET_CODE (x))
7686 gcc_assert (flag_pic);
7691 if (! TARGET_MACHO || TARGET_64BIT)
7692 output_addr_const (file, x);
7695 const char *name = XSTR (x, 0);
7697 /* Mark the decl as referenced so that cgraph will
7698 output the function. */
7699 if (SYMBOL_REF_DECL (x))
7700 mark_decl_referenced (SYMBOL_REF_DECL (x));
7703 if (MACHOPIC_INDIRECT
7704 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7705 name = machopic_indirection_name (x, /*stub_p=*/true);
7707 assemble_name (file, name);
7709 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7710 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7711 fputs ("@PLT", file);
7718 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7719 assemble_name (asm_out_file, buf);
7723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7727 /* This used to output parentheses around the expression,
7728 but that does not work on the 386 (either ATT or BSD assembler). */
7729 output_pic_addr_const (file, XEXP (x, 0), code);
7733 if (GET_MODE (x) == VOIDmode)
7735 /* We can use %d if the number is <32 bits and positive. */
7736 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7737 fprintf (file, "0x%lx%08lx",
7738 (unsigned long) CONST_DOUBLE_HIGH (x),
7739 (unsigned long) CONST_DOUBLE_LOW (x));
7741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7744 /* We can't handle floating point constants;
7745 PRINT_OPERAND must handle them. */
7746 output_operand_lossage ("floating constant misused");
7750 /* Some assemblers need integer constants to appear first. */
7751 if (CONST_INT_P (XEXP (x, 0)))
7753 output_pic_addr_const (file, XEXP (x, 0), code);
7755 output_pic_addr_const (file, XEXP (x, 1), code);
7759 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7760 output_pic_addr_const (file, XEXP (x, 1), code);
7762 output_pic_addr_const (file, XEXP (x, 0), code);
7768 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7769 output_pic_addr_const (file, XEXP (x, 0), code);
7771 output_pic_addr_const (file, XEXP (x, 1), code);
7773 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7777 gcc_assert (XVECLEN (x, 0) == 1);
7778 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7779 switch (XINT (x, 1))
7782 fputs ("@GOT", file);
7785 fputs ("@GOTOFF", file);
7788 fputs ("@PLTOFF", file);
7790 case UNSPEC_GOTPCREL:
7791 fputs ("@GOTPCREL(%rip)", file);
7793 case UNSPEC_GOTTPOFF:
7794 /* FIXME: This might be @TPOFF in Sun ld too. */
7795 fputs ("@GOTTPOFF", file);
7798 fputs ("@TPOFF", file);
7802 fputs ("@TPOFF", file);
7804 fputs ("@NTPOFF", file);
7807 fputs ("@DTPOFF", file);
7809 case UNSPEC_GOTNTPOFF:
7811 fputs ("@GOTTPOFF(%rip)", file);
7813 fputs ("@GOTNTPOFF", file);
7815 case UNSPEC_INDNTPOFF:
7816 fputs ("@INDNTPOFF", file);
7819 output_operand_lossage ("invalid UNSPEC as operand");
7825 output_operand_lossage ("invalid expression as operand");
7829 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7830 We need to emit DTP-relative relocations. */
7832 static void ATTRIBUTE_UNUSED
7833 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7835 fputs (ASM_LONG, file);
7836 output_addr_const (file, x);
7837 fputs ("@DTPOFF", file);
7843 fputs (", 0", file);
7850 /* In the name of slightly smaller debug output, and to cater to
7851 general assembler lossage, recognize PIC+GOTOFF and turn it back
7852 into a direct symbol reference.
7854 On Darwin, this is necessary to avoid a crash, because Darwin
7855 has a different PIC label for each routine but the DWARF debugging
7856 information is not associated with any particular routine, so it's
7857 necessary to remove references to the PIC label from RTL stored by
7858 the DWARF output code. */
7861 ix86_delegitimize_address (rtx orig_x)
7864 /* reg_addend is NULL or a multiple of some register. */
7865 rtx reg_addend = NULL_RTX;
7866 /* const_addend is NULL or a const_int. */
7867 rtx const_addend = NULL_RTX;
7868 /* This is the result, or NULL. */
7869 rtx result = NULL_RTX;
7876 if (GET_CODE (x) != CONST
7877 || GET_CODE (XEXP (x, 0)) != UNSPEC
7878 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7881 return XVECEXP (XEXP (x, 0), 0, 0);
7884 if (GET_CODE (x) != PLUS
7885 || GET_CODE (XEXP (x, 1)) != CONST)
7888 if (REG_P (XEXP (x, 0))
7889 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7890 /* %ebx + GOT/GOTOFF */
7892 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7894 /* %ebx + %reg * scale + GOT/GOTOFF */
7895 reg_addend = XEXP (x, 0);
7896 if (REG_P (XEXP (reg_addend, 0))
7897 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7898 reg_addend = XEXP (reg_addend, 1);
7899 else if (REG_P (XEXP (reg_addend, 1))
7900 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7901 reg_addend = XEXP (reg_addend, 0);
7904 if (!REG_P (reg_addend)
7905 && GET_CODE (reg_addend) != MULT
7906 && GET_CODE (reg_addend) != ASHIFT)
7912 x = XEXP (XEXP (x, 1), 0);
7913 if (GET_CODE (x) == PLUS
7914 && CONST_INT_P (XEXP (x, 1)))
7916 const_addend = XEXP (x, 1);
7920 if (GET_CODE (x) == UNSPEC
7921 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7922 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7923 result = XVECEXP (x, 0, 0);
7925 if (TARGET_MACHO && darwin_local_data_pic (x)
7927 result = XEXP (x, 0);
7933 result = gen_rtx_PLUS (Pmode, result, const_addend);
7935 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7939 /* If X is a machine specific address (i.e. a symbol or label being
7940 referenced as a displacement from the GOT implemented using an
7941 UNSPEC), then return the base term. Otherwise return X. */
7944 ix86_find_base_term (rtx x)
7950 if (GET_CODE (x) != CONST)
7953 if (GET_CODE (term) == PLUS
7954 && (CONST_INT_P (XEXP (term, 1))
7955 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7956 term = XEXP (term, 0);
7957 if (GET_CODE (term) != UNSPEC
7958 || XINT (term, 1) != UNSPEC_GOTPCREL)
7961 term = XVECEXP (term, 0, 0);
7963 if (GET_CODE (term) != SYMBOL_REF
7964 && GET_CODE (term) != LABEL_REF)
7970 term = ix86_delegitimize_address (x);
7972 if (GET_CODE (term) != SYMBOL_REF
7973 && GET_CODE (term) != LABEL_REF)
7980 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7985 if (mode == CCFPmode || mode == CCFPUmode)
7987 enum rtx_code second_code, bypass_code;
7988 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7989 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7990 code = ix86_fp_compare_code_to_integer (code);
7994 code = reverse_condition (code);
8005 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8009 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8010 Those same assemblers have the same but opposite lossage on cmov. */
8011 gcc_assert (mode == CCmode);
8012 suffix = fp ? "nbe" : "a";
8032 gcc_assert (mode == CCmode);
8054 gcc_assert (mode == CCmode);
8055 suffix = fp ? "nb" : "ae";
8058 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8062 gcc_assert (mode == CCmode);
8066 suffix = fp ? "u" : "p";
8069 suffix = fp ? "nu" : "np";
8074 fputs (suffix, file);
8077 /* Print the name of register X to FILE based on its machine mode and number.
8078 If CODE is 'w', pretend the mode is HImode.
8079 If CODE is 'b', pretend the mode is QImode.
8080 If CODE is 'k', pretend the mode is SImode.
8081 If CODE is 'q', pretend the mode is DImode.
8082 If CODE is 'h', pretend the reg is the 'high' byte register.
8083 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8086 print_reg (rtx x, int code, FILE *file)
8088 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8089 && REGNO (x) != FRAME_POINTER_REGNUM
8090 && REGNO (x) != FLAGS_REG
8091 && REGNO (x) != FPSR_REG
8092 && REGNO (x) != FPCR_REG);
8094 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8097 if (code == 'w' || MMX_REG_P (x))
8099 else if (code == 'b')
8101 else if (code == 'k')
8103 else if (code == 'q')
8105 else if (code == 'y')
8107 else if (code == 'h')
8110 code = GET_MODE_SIZE (GET_MODE (x));
8112 /* Irritatingly, AMD extended registers use different naming convention
8113 from the normal registers. */
8114 if (REX_INT_REG_P (x))
8116 gcc_assert (TARGET_64BIT);
8120 error ("extended registers have no high halves");
8123 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8126 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8129 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8132 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8135 error ("unsupported operand size for extended register");
8143 if (STACK_TOP_P (x))
8145 fputs ("st(0)", file);
8152 if (! ANY_FP_REG_P (x))
8153 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8158 fputs (hi_reg_name[REGNO (x)], file);
8161 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8163 fputs (qi_reg_name[REGNO (x)], file);
8166 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8168 fputs (qi_high_reg_name[REGNO (x)], file);
8175 /* Locate some local-dynamic symbol still in use by this function
8176 so that we can print its name in some tls_local_dynamic_base
8180 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8184 if (GET_CODE (x) == SYMBOL_REF
8185 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8187 cfun->machine->some_ld_name = XSTR (x, 0);
8195 get_some_local_dynamic_name (void)
8199 if (cfun->machine->some_ld_name)
8200 return cfun->machine->some_ld_name;
8202 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8204 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8205 return cfun->machine->some_ld_name;
8211 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8212 C -- print opcode suffix for set/cmov insn.
8213 c -- like C, but print reversed condition
8214 F,f -- likewise, but for floating-point.
8215 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8217 R -- print the prefix for register names.
8218 z -- print the opcode suffix for the size of the current operand.
8219 * -- print a star (in certain assembler syntax)
8220 A -- print an absolute memory reference.
8221 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8222 s -- print a shift double count, followed by the assemblers argument
8224 b -- print the QImode name of the register for the indicated operand.
8225 %b0 would print %al if operands[0] is reg 0.
8226 w -- likewise, print the HImode name of the register.
8227 k -- likewise, print the SImode name of the register.
8228 q -- likewise, print the DImode name of the register.
8229 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8230 y -- print "st(0)" instead of "st" as a register.
8231 D -- print condition for SSE cmp instruction.
8232 P -- if PIC, print an @PLT suffix.
8233 X -- don't print any sort of PIC '@' suffix for a symbol.
8234 & -- print some in-use local-dynamic symbol name.
8235 H -- print a memory address offset by 8; used for sse high-parts
8239 print_operand (FILE *file, rtx x, int code)
8246 if (ASSEMBLER_DIALECT == ASM_ATT)
8251 assemble_name (file, get_some_local_dynamic_name ());
8255 switch (ASSEMBLER_DIALECT)
8262 /* Intel syntax. For absolute addresses, registers should not
8263 be surrounded by braces. */
8267 PRINT_OPERAND (file, x, 0);
8277 PRINT_OPERAND (file, x, 0);
8282 if (ASSEMBLER_DIALECT == ASM_ATT)
8287 if (ASSEMBLER_DIALECT == ASM_ATT)
8292 if (ASSEMBLER_DIALECT == ASM_ATT)
8297 if (ASSEMBLER_DIALECT == ASM_ATT)
8302 if (ASSEMBLER_DIALECT == ASM_ATT)
8307 if (ASSEMBLER_DIALECT == ASM_ATT)
8312 /* 387 opcodes don't get size suffixes if the operands are
8314 if (STACK_REG_P (x))
8317 /* Likewise if using Intel opcodes. */
8318 if (ASSEMBLER_DIALECT == ASM_INTEL)
8321 /* This is the size of op from size of operand. */
8322 switch (GET_MODE_SIZE (GET_MODE (x)))
8329 #ifdef HAVE_GAS_FILDS_FISTS
8335 if (GET_MODE (x) == SFmode)
8350 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8352 #ifdef GAS_MNEMONICS
8378 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8380 PRINT_OPERAND (file, x, 0);
8386 /* Little bit of braindamage here. The SSE compare instructions
8387 does use completely different names for the comparisons that the
8388 fp conditional moves. */
8389 switch (GET_CODE (x))
8404 fputs ("unord", file);
8408 fputs ("neq", file);
8412 fputs ("nlt", file);
8416 fputs ("nle", file);
8419 fputs ("ord", file);
8426 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8427 if (ASSEMBLER_DIALECT == ASM_ATT)
8429 switch (GET_MODE (x))
8431 case HImode: putc ('w', file); break;
8433 case SFmode: putc ('l', file); break;
8435 case DFmode: putc ('q', file); break;
8436 default: gcc_unreachable ();
8443 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8446 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8447 if (ASSEMBLER_DIALECT == ASM_ATT)
8450 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8453 /* Like above, but reverse condition */
8455 /* Check to see if argument to %c is really a constant
8456 and not a condition code which needs to be reversed. */
8457 if (!COMPARISON_P (x))
8459 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8462 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8465 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8466 if (ASSEMBLER_DIALECT == ASM_ATT)
8469 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8473 /* It doesn't actually matter what mode we use here, as we're
8474 only going to use this for printing. */
8475 x = adjust_address_nv (x, DImode, 8);
8482 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8485 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8488 int pred_val = INTVAL (XEXP (x, 0));
8490 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8491 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8493 int taken = pred_val > REG_BR_PROB_BASE / 2;
8494 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8496 /* Emit hints only in the case default branch prediction
8497 heuristics would fail. */
8498 if (taken != cputaken)
8500 /* We use 3e (DS) prefix for taken branches and
8501 2e (CS) prefix for not taken branches. */
8503 fputs ("ds ; ", file);
8505 fputs ("cs ; ", file);
8512 output_operand_lossage ("invalid operand code '%c'", code);
8517 print_reg (x, code, file);
8521 /* No `byte ptr' prefix for call instructions. */
8522 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8525 switch (GET_MODE_SIZE (GET_MODE (x)))
8527 case 1: size = "BYTE"; break;
8528 case 2: size = "WORD"; break;
8529 case 4: size = "DWORD"; break;
8530 case 8: size = "QWORD"; break;
8531 case 12: size = "XWORD"; break;
8532 case 16: size = "XMMWORD"; break;
8537 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8540 else if (code == 'w')
8542 else if (code == 'k')
8546 fputs (" PTR ", file);
8550 /* Avoid (%rip) for call operands. */
8551 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8552 && !CONST_INT_P (x))
8553 output_addr_const (file, x);
8554 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8555 output_operand_lossage ("invalid constraints for operand");
8560 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8565 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8566 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8568 if (ASSEMBLER_DIALECT == ASM_ATT)
8570 fprintf (file, "0x%08lx", l);
8573 /* These float cases don't actually occur as immediate operands. */
8574 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8578 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8579 fprintf (file, "%s", dstr);
8582 else if (GET_CODE (x) == CONST_DOUBLE
8583 && GET_MODE (x) == XFmode)
8587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8588 fprintf (file, "%s", dstr);
8593 /* We have patterns that allow zero sets of memory, for instance.
8594 In 64-bit mode, we should probably support all 8-byte vectors,
8595 since we can in fact encode that into an immediate. */
8596 if (GET_CODE (x) == CONST_VECTOR)
8598 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8604 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8606 if (ASSEMBLER_DIALECT == ASM_ATT)
8609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8610 || GET_CODE (x) == LABEL_REF)
8612 if (ASSEMBLER_DIALECT == ASM_ATT)
8615 fputs ("OFFSET FLAT:", file);
8618 if (CONST_INT_P (x))
8619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8621 output_pic_addr_const (file, x, code);
8623 output_addr_const (file, x);
8627 /* Print a memory operand whose address is ADDR. */
8630 print_operand_address (FILE *file, rtx addr)
8632 struct ix86_address parts;
8633 rtx base, index, disp;
8635 int ok = ix86_decompose_address (addr, &parts);
8640 index = parts.index;
8642 scale = parts.scale;
8650 if (USER_LABEL_PREFIX[0] == 0)
8652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8658 if (!base && !index)
8660 /* Displacement only requires special attention. */
8662 if (CONST_INT_P (disp))
8664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8666 if (USER_LABEL_PREFIX[0] == 0)
8668 fputs ("ds:", file);
8670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8673 output_pic_addr_const (file, disp, 0);
8675 output_addr_const (file, disp);
8677 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8680 if (GET_CODE (disp) == CONST
8681 && GET_CODE (XEXP (disp, 0)) == PLUS
8682 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8683 disp = XEXP (XEXP (disp, 0), 0);
8684 if (GET_CODE (disp) == LABEL_REF
8685 || (GET_CODE (disp) == SYMBOL_REF
8686 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8687 fputs ("(%rip)", file);
8692 if (ASSEMBLER_DIALECT == ASM_ATT)
8697 output_pic_addr_const (file, disp, 0);
8698 else if (GET_CODE (disp) == LABEL_REF)
8699 output_asm_label (disp);
8701 output_addr_const (file, disp);
8706 print_reg (base, 0, file);
8710 print_reg (index, 0, file);
8712 fprintf (file, ",%d", scale);
8718 rtx offset = NULL_RTX;
8722 /* Pull out the offset of a symbol; print any symbol itself. */
8723 if (GET_CODE (disp) == CONST
8724 && GET_CODE (XEXP (disp, 0)) == PLUS
8725 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8727 offset = XEXP (XEXP (disp, 0), 1);
8728 disp = gen_rtx_CONST (VOIDmode,
8729 XEXP (XEXP (disp, 0), 0));
8733 output_pic_addr_const (file, disp, 0);
8734 else if (GET_CODE (disp) == LABEL_REF)
8735 output_asm_label (disp);
8736 else if (CONST_INT_P (disp))
8739 output_addr_const (file, disp);
8745 print_reg (base, 0, file);
8748 if (INTVAL (offset) >= 0)
8750 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8754 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8761 print_reg (index, 0, file);
8763 fprintf (file, "*%d", scale);
8771 output_addr_const_extra (FILE *file, rtx x)
8775 if (GET_CODE (x) != UNSPEC)
8778 op = XVECEXP (x, 0, 0);
8779 switch (XINT (x, 1))
8781 case UNSPEC_GOTTPOFF:
8782 output_addr_const (file, op);
8783 /* FIXME: This might be @TPOFF in Sun ld. */
8784 fputs ("@GOTTPOFF", file);
8787 output_addr_const (file, op);
8788 fputs ("@TPOFF", file);
8791 output_addr_const (file, op);
8793 fputs ("@TPOFF", file);
8795 fputs ("@NTPOFF", file);
8798 output_addr_const (file, op);
8799 fputs ("@DTPOFF", file);
8801 case UNSPEC_GOTNTPOFF:
8802 output_addr_const (file, op);
8804 fputs ("@GOTTPOFF(%rip)", file);
8806 fputs ("@GOTNTPOFF", file);
8808 case UNSPEC_INDNTPOFF:
8809 output_addr_const (file, op);
8810 fputs ("@INDNTPOFF", file);
8820 /* Split one or more DImode RTL references into pairs of SImode
8821 references. The RTL can be REG, offsettable MEM, integer constant, or
8822 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8823 split and "num" is its length. lo_half and hi_half are output arrays
8824 that parallel "operands". */
8827 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8831 rtx op = operands[num];
8833 /* simplify_subreg refuse to split volatile memory addresses,
8834 but we still have to handle it. */
8837 lo_half[num] = adjust_address (op, SImode, 0);
8838 hi_half[num] = adjust_address (op, SImode, 4);
8842 lo_half[num] = simplify_gen_subreg (SImode, op,
8843 GET_MODE (op) == VOIDmode
8844 ? DImode : GET_MODE (op), 0);
8845 hi_half[num] = simplify_gen_subreg (SImode, op,
8846 GET_MODE (op) == VOIDmode
8847 ? DImode : GET_MODE (op), 4);
8851 /* Split one or more TImode RTL references into pairs of DImode
8852 references. The RTL can be REG, offsettable MEM, integer constant, or
8853 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8854 split and "num" is its length. lo_half and hi_half are output arrays
8855 that parallel "operands". */
8858 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8862 rtx op = operands[num];
8864 /* simplify_subreg refuse to split volatile memory addresses, but we
8865 still have to handle it. */
8868 lo_half[num] = adjust_address (op, DImode, 0);
8869 hi_half[num] = adjust_address (op, DImode, 8);
8873 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8874 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8879 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8880 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8881 is the expression of the binary operation. The output may either be
8882 emitted here, or returned to the caller, like all output_* functions.
8884 There is no guarantee that the operands are the same mode, as they
8885 might be within FLOAT or FLOAT_EXTEND expressions. */
8887 #ifndef SYSV386_COMPAT
8888 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8889 wants to fix the assemblers because that causes incompatibility
8890 with gcc. No-one wants to fix gcc because that causes
8891 incompatibility with assemblers... You can use the option of
8892 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8893 #define SYSV386_COMPAT 1
8897 output_387_binary_op (rtx insn, rtx *operands)
8899 static char buf[30];
8902 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8904 #ifdef ENABLE_CHECKING
8905 /* Even if we do not want to check the inputs, this documents input
8906 constraints. Which helps in understanding the following code. */
8907 if (STACK_REG_P (operands[0])
8908 && ((REG_P (operands[1])
8909 && REGNO (operands[0]) == REGNO (operands[1])
8910 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8911 || (REG_P (operands[2])
8912 && REGNO (operands[0]) == REGNO (operands[2])
8913 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8914 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8917 gcc_assert (is_sse);
8920 switch (GET_CODE (operands[3]))
8923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8950 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8951 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8965 if (GET_MODE (operands[0]) == SFmode)
8966 strcat (buf, "ss\t{%2, %0|%0, %2}");
8968 strcat (buf, "sd\t{%2, %0|%0, %2}");
8973 switch (GET_CODE (operands[3]))
8977 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8979 rtx temp = operands[2];
8980 operands[2] = operands[1];
8984 /* know operands[0] == operands[1]. */
8986 if (MEM_P (operands[2]))
8992 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8994 if (STACK_TOP_P (operands[0]))
8995 /* How is it that we are storing to a dead operand[2]?
8996 Well, presumably operands[1] is dead too. We can't
8997 store the result to st(0) as st(0) gets popped on this
8998 instruction. Instead store to operands[2] (which I
8999 think has to be st(1)). st(1) will be popped later.
9000 gcc <= 2.8.1 didn't have this check and generated
9001 assembly code that the Unixware assembler rejected. */
9002 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9004 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9008 if (STACK_TOP_P (operands[0]))
9009 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9011 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9016 if (MEM_P (operands[1]))
9022 if (MEM_P (operands[2]))
9028 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9031 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9032 derived assemblers, confusingly reverse the direction of
9033 the operation for fsub{r} and fdiv{r} when the
9034 destination register is not st(0). The Intel assembler
9035 doesn't have this brain damage. Read !SYSV386_COMPAT to
9036 figure out what the hardware really does. */
9037 if (STACK_TOP_P (operands[0]))
9038 p = "{p\t%0, %2|rp\t%2, %0}";
9040 p = "{rp\t%2, %0|p\t%0, %2}";
9042 if (STACK_TOP_P (operands[0]))
9043 /* As above for fmul/fadd, we can't store to st(0). */
9044 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9046 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9051 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9054 if (STACK_TOP_P (operands[0]))
9055 p = "{rp\t%0, %1|p\t%1, %0}";
9057 p = "{p\t%1, %0|rp\t%0, %1}";
9059 if (STACK_TOP_P (operands[0]))
9060 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9062 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9067 if (STACK_TOP_P (operands[0]))
9069 if (STACK_TOP_P (operands[1]))
9070 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9072 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9075 else if (STACK_TOP_P (operands[1]))
9078 p = "{\t%1, %0|r\t%0, %1}";
9080 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9086 p = "{r\t%2, %0|\t%0, %2}";
9088 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9101 /* Return needed mode for entity in optimize_mode_switching pass. */
9104 ix86_mode_needed (int entity, rtx insn)
9106 enum attr_i387_cw mode;
9108 /* The mode UNINITIALIZED is used to store control word after a
9109 function call or ASM pattern. The mode ANY specify that function
9110 has no requirements on the control word and make no changes in the
9111 bits we are interested in. */
9114 || (NONJUMP_INSN_P (insn)
9115 && (asm_noperands (PATTERN (insn)) >= 0
9116 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9117 return I387_CW_UNINITIALIZED;
9119 if (recog_memoized (insn) < 0)
9122 mode = get_attr_i387_cw (insn);
9127 if (mode == I387_CW_TRUNC)
9132 if (mode == I387_CW_FLOOR)
9137 if (mode == I387_CW_CEIL)
9142 if (mode == I387_CW_MASK_PM)
9153 /* Output code to initialize control word copies used by trunc?f?i and
9154 rounding patterns. CURRENT_MODE is set to current control word,
9155 while NEW_MODE is set to new control word. */
9158 emit_i387_cw_initialization (int mode)
9160 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9165 rtx reg = gen_reg_rtx (HImode);
9167 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9168 emit_move_insn (reg, copy_rtx (stored_mode));
9170 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9175 /* round toward zero (truncate) */
9176 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9177 slot = SLOT_CW_TRUNC;
9181 /* round down toward -oo */
9182 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9183 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9184 slot = SLOT_CW_FLOOR;
9188 /* round up toward +oo */
9189 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9190 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9191 slot = SLOT_CW_CEIL;
9194 case I387_CW_MASK_PM:
9195 /* mask precision exception for nearbyint() */
9196 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9197 slot = SLOT_CW_MASK_PM;
9209 /* round toward zero (truncate) */
9210 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9211 slot = SLOT_CW_TRUNC;
9215 /* round down toward -oo */
9216 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9217 slot = SLOT_CW_FLOOR;
9221 /* round up toward +oo */
9222 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9223 slot = SLOT_CW_CEIL;
9226 case I387_CW_MASK_PM:
9227 /* mask precision exception for nearbyint() */
9228 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9229 slot = SLOT_CW_MASK_PM;
9237 gcc_assert (slot < MAX_386_STACK_LOCALS);
9239 new_mode = assign_386_stack_local (HImode, slot);
9240 emit_move_insn (new_mode, reg);
9243 /* Output code for INSN to convert a float to a signed int. OPERANDS
9244 are the insn operands. The output may be [HSD]Imode and the input
9245 operand may be [SDX]Fmode. */
9248 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9250 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9251 int dimode_p = GET_MODE (operands[0]) == DImode;
9252 int round_mode = get_attr_i387_cw (insn);
9254 /* Jump through a hoop or two for DImode, since the hardware has no
9255 non-popping instruction. We used to do this a different way, but
9256 that was somewhat fragile and broke with post-reload splitters. */
9257 if ((dimode_p || fisttp) && !stack_top_dies)
9258 output_asm_insn ("fld\t%y1", operands);
9260 gcc_assert (STACK_TOP_P (operands[1]));
9261 gcc_assert (MEM_P (operands[0]));
9262 gcc_assert (GET_MODE (operands[1]) != TFmode);
9265 output_asm_insn ("fisttp%z0\t%0", operands);
9268 if (round_mode != I387_CW_ANY)
9269 output_asm_insn ("fldcw\t%3", operands);
9270 if (stack_top_dies || dimode_p)
9271 output_asm_insn ("fistp%z0\t%0", operands);
9273 output_asm_insn ("fist%z0\t%0", operands);
9274 if (round_mode != I387_CW_ANY)
9275 output_asm_insn ("fldcw\t%2", operands);
9281 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9282 have the values zero or one, indicates the ffreep insn's operand
9283 from the OPERANDS array. */
9286 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9288 if (TARGET_USE_FFREEP)
9289 #if HAVE_AS_IX86_FFREEP
9290 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9293 static char retval[] = ".word\t0xc_df";
9294 int regno = REGNO (operands[opno]);
9296 gcc_assert (FP_REGNO_P (regno));
9298 retval[9] = '0' + (regno - FIRST_STACK_REG);
9303 return opno ? "fstp\t%y1" : "fstp\t%y0";
9307 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9308 should be used. UNORDERED_P is true when fucom should be used. */
9311 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9314 rtx cmp_op0, cmp_op1;
9315 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9319 cmp_op0 = operands[0];
9320 cmp_op1 = operands[1];
9324 cmp_op0 = operands[1];
9325 cmp_op1 = operands[2];
9330 if (GET_MODE (operands[0]) == SFmode)
9332 return "ucomiss\t{%1, %0|%0, %1}";
9334 return "comiss\t{%1, %0|%0, %1}";
9337 return "ucomisd\t{%1, %0|%0, %1}";
9339 return "comisd\t{%1, %0|%0, %1}";
9342 gcc_assert (STACK_TOP_P (cmp_op0));
9344 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9346 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9350 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9351 return output_387_ffreep (operands, 1);
9354 return "ftst\n\tfnstsw\t%0";
9357 if (STACK_REG_P (cmp_op1)
9359 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9360 && REGNO (cmp_op1) != FIRST_STACK_REG)
9362 /* If both the top of the 387 stack dies, and the other operand
9363 is also a stack register that dies, then this must be a
9364 `fcompp' float compare */
9368 /* There is no double popping fcomi variant. Fortunately,
9369 eflags is immune from the fstp's cc clobbering. */
9371 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9373 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9374 return output_387_ffreep (operands, 0);
9379 return "fucompp\n\tfnstsw\t%0";
9381 return "fcompp\n\tfnstsw\t%0";
9386 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9388 static const char * const alt[16] =
9390 "fcom%z2\t%y2\n\tfnstsw\t%0",
9391 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9392 "fucom%z2\t%y2\n\tfnstsw\t%0",
9393 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9395 "ficom%z2\t%y2\n\tfnstsw\t%0",
9396 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9400 "fcomi\t{%y1, %0|%0, %y1}",
9401 "fcomip\t{%y1, %0|%0, %y1}",
9402 "fucomi\t{%y1, %0|%0, %y1}",
9403 "fucomip\t{%y1, %0|%0, %y1}",
9414 mask = eflags_p << 3;
9415 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9416 mask |= unordered_p << 1;
9417 mask |= stack_top_dies;
9419 gcc_assert (mask < 16);
9428 ix86_output_addr_vec_elt (FILE *file, int value)
9430 const char *directive = ASM_LONG;
9434 directive = ASM_QUAD;
9436 gcc_assert (!TARGET_64BIT);
9439 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9443 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9445 const char *directive = ASM_LONG;
9448 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9449 directive = ASM_QUAD;
9451 gcc_assert (!TARGET_64BIT);
9453 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9454 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9455 fprintf (file, "%s%s%d-%s%d\n",
9456 directive, LPREFIX, value, LPREFIX, rel);
9457 else if (HAVE_AS_GOTOFF_IN_DATA)
9458 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9460 else if (TARGET_MACHO)
9462 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9463 machopic_output_function_base_name (file);
9464 fprintf(file, "\n");
9468 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9469 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9472 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9476 ix86_expand_clear (rtx dest)
9480 /* We play register width games, which are only valid after reload. */
9481 gcc_assert (reload_completed);
9483 /* Avoid HImode and its attendant prefix byte. */
9484 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9485 dest = gen_rtx_REG (SImode, REGNO (dest));
9486 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9488 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9489 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9491 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9492 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9498 /* X is an unchanging MEM. If it is a constant pool reference, return
9499 the constant pool rtx, else NULL. */
9502 maybe_get_pool_constant (rtx x)
9504 x = ix86_delegitimize_address (XEXP (x, 0));
9506 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9507 return get_pool_constant (x);
9513 ix86_expand_move (enum machine_mode mode, rtx operands[])
9515 int strict = (reload_in_progress || reload_completed);
9517 enum tls_model model;
9522 if (GET_CODE (op1) == SYMBOL_REF)
9524 model = SYMBOL_REF_TLS_MODEL (op1);
9527 op1 = legitimize_tls_address (op1, model, true);
9528 op1 = force_operand (op1, op0);
9532 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9533 && SYMBOL_REF_DLLIMPORT_P (op1))
9534 op1 = legitimize_dllimport_symbol (op1, false);
9536 else if (GET_CODE (op1) == CONST
9537 && GET_CODE (XEXP (op1, 0)) == PLUS
9538 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9540 rtx addend = XEXP (XEXP (op1, 0), 1);
9541 rtx symbol = XEXP (XEXP (op1, 0), 0);
9544 model = SYMBOL_REF_TLS_MODEL (symbol);
9546 tmp = legitimize_tls_address (symbol, model, true);
9547 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9548 && SYMBOL_REF_DLLIMPORT_P (symbol))
9549 tmp = legitimize_dllimport_symbol (symbol, true);
9553 tmp = force_operand (tmp, NULL);
9554 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9555 op0, 1, OPTAB_DIRECT);
9561 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9563 if (TARGET_MACHO && !TARGET_64BIT)
9568 rtx temp = ((reload_in_progress
9569 || ((op0 && REG_P (op0))
9571 ? op0 : gen_reg_rtx (Pmode));
9572 op1 = machopic_indirect_data_reference (op1, temp);
9573 op1 = machopic_legitimize_pic_address (op1, mode,
9574 temp == op1 ? 0 : temp);
9576 else if (MACHOPIC_INDIRECT)
9577 op1 = machopic_indirect_data_reference (op1, 0);
9585 op1 = force_reg (Pmode, op1);
9586 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9588 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9589 op1 = legitimize_pic_address (op1, reg);
9598 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9599 || !push_operand (op0, mode))
9601 op1 = force_reg (mode, op1);
9603 if (push_operand (op0, mode)
9604 && ! general_no_elim_operand (op1, mode))
9605 op1 = copy_to_mode_reg (mode, op1);
9607 /* Force large constants in 64bit compilation into register
9608 to get them CSEed. */
9609 if (TARGET_64BIT && mode == DImode
9610 && immediate_operand (op1, mode)
9611 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9612 && !register_operand (op0, mode)
9613 && optimize && !reload_completed && !reload_in_progress)
9614 op1 = copy_to_mode_reg (mode, op1);
9616 if (FLOAT_MODE_P (mode))
9618 /* If we are loading a floating point constant to a register,
9619 force the value to memory now, since we'll get better code
9620 out the back end. */
9624 else if (GET_CODE (op1) == CONST_DOUBLE)
9626 op1 = validize_mem (force_const_mem (mode, op1));
9627 if (!register_operand (op0, mode))
9629 rtx temp = gen_reg_rtx (mode);
9630 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9631 emit_move_insn (op0, temp);
9638 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9642 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9644 rtx op0 = operands[0], op1 = operands[1];
9646 /* Force constants other than zero into memory. We do not know how
9647 the instructions used to build constants modify the upper 64 bits
9648 of the register, once we have that information we may be able
9649 to handle some of them more efficiently. */
9650 if ((reload_in_progress | reload_completed) == 0
9651 && register_operand (op0, mode)
9653 && standard_sse_constant_p (op1) <= 0)
9654 op1 = validize_mem (force_const_mem (mode, op1));
9656 /* Make operand1 a register if it isn't already. */
9658 && !register_operand (op0, mode)
9659 && !register_operand (op1, mode))
9661 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9665 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9668 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9669 straight to ix86_expand_vector_move. */
9670 /* Code generation for scalar reg-reg moves of single and double precision data:
9671 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9675 if (x86_sse_partial_reg_dependency == true)
9680 Code generation for scalar loads of double precision data:
9681 if (x86_sse_split_regs == true)
9682 movlpd mem, reg (gas syntax)
9686 Code generation for unaligned packed loads of single precision data
9687 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9688 if (x86_sse_unaligned_move_optimal)
9691 if (x86_sse_partial_reg_dependency == true)
9703 Code generation for unaligned packed loads of double precision data
9704 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9705 if (x86_sse_unaligned_move_optimal)
9708 if (x86_sse_split_regs == true)
9721 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9730 /* If we're optimizing for size, movups is the smallest. */
9733 op0 = gen_lowpart (V4SFmode, op0);
9734 op1 = gen_lowpart (V4SFmode, op1);
9735 emit_insn (gen_sse_movups (op0, op1));
9739 /* ??? If we have typed data, then it would appear that using
9740 movdqu is the only way to get unaligned data loaded with
9742 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9744 op0 = gen_lowpart (V16QImode, op0);
9745 op1 = gen_lowpart (V16QImode, op1);
9746 emit_insn (gen_sse2_movdqu (op0, op1));
9750 if (TARGET_SSE2 && mode == V2DFmode)
9754 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9756 op0 = gen_lowpart (V2DFmode, op0);
9757 op1 = gen_lowpart (V2DFmode, op1);
9758 emit_insn (gen_sse2_movupd (op0, op1));
9762 /* When SSE registers are split into halves, we can avoid
9763 writing to the top half twice. */
9764 if (TARGET_SSE_SPLIT_REGS)
9766 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9771 /* ??? Not sure about the best option for the Intel chips.
9772 The following would seem to satisfy; the register is
9773 entirely cleared, breaking the dependency chain. We
9774 then store to the upper half, with a dependency depth
9775 of one. A rumor has it that Intel recommends two movsd
9776 followed by an unpacklpd, but this is unconfirmed. And
9777 given that the dependency depth of the unpacklpd would
9778 still be one, I'm not sure why this would be better. */
9779 zero = CONST0_RTX (V2DFmode);
9782 m = adjust_address (op1, DFmode, 0);
9783 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9784 m = adjust_address (op1, DFmode, 8);
9785 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9789 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9791 op0 = gen_lowpart (V4SFmode, op0);
9792 op1 = gen_lowpart (V4SFmode, op1);
9793 emit_insn (gen_sse_movups (op0, op1));
9797 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9798 emit_move_insn (op0, CONST0_RTX (mode));
9800 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9802 if (mode != V4SFmode)
9803 op0 = gen_lowpart (V4SFmode, op0);
9804 m = adjust_address (op1, V2SFmode, 0);
9805 emit_insn (gen_sse_loadlps (op0, op0, m));
9806 m = adjust_address (op1, V2SFmode, 8);
9807 emit_insn (gen_sse_loadhps (op0, op0, m));
9810 else if (MEM_P (op0))
9812 /* If we're optimizing for size, movups is the smallest. */
9815 op0 = gen_lowpart (V4SFmode, op0);
9816 op1 = gen_lowpart (V4SFmode, op1);
9817 emit_insn (gen_sse_movups (op0, op1));
9821 /* ??? Similar to above, only less clear because of quote
9822 typeless stores unquote. */
9823 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9824 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9826 op0 = gen_lowpart (V16QImode, op0);
9827 op1 = gen_lowpart (V16QImode, op1);
9828 emit_insn (gen_sse2_movdqu (op0, op1));
9832 if (TARGET_SSE2 && mode == V2DFmode)
9834 m = adjust_address (op0, DFmode, 0);
9835 emit_insn (gen_sse2_storelpd (m, op1));
9836 m = adjust_address (op0, DFmode, 8);
9837 emit_insn (gen_sse2_storehpd (m, op1));
9841 if (mode != V4SFmode)
9842 op1 = gen_lowpart (V4SFmode, op1);
9843 m = adjust_address (op0, V2SFmode, 0);
9844 emit_insn (gen_sse_storelps (m, op1));
9845 m = adjust_address (op0, V2SFmode, 8);
9846 emit_insn (gen_sse_storehps (m, op1));
9853 /* Expand a push in MODE. This is some mode for which we do not support
9854 proper push instructions, at least from the registers that we expect
9855 the value to live in. */
9858 ix86_expand_push (enum machine_mode mode, rtx x)
9862 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9863 GEN_INT (-GET_MODE_SIZE (mode)),
9864 stack_pointer_rtx, 1, OPTAB_DIRECT);
9865 if (tmp != stack_pointer_rtx)
9866 emit_move_insn (stack_pointer_rtx, tmp);
9868 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9869 emit_move_insn (tmp, x);
9872 /* Helper function of ix86_fixup_binary_operands to canonicalize
9873 operand order. Returns true if the operands should be swapped. */
9876 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9879 rtx dst = operands[0];
9880 rtx src1 = operands[1];
9881 rtx src2 = operands[2];
9883 /* If the operation is not commutative, we can't do anything. */
9884 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9887 /* Highest priority is that src1 should match dst. */
9888 if (rtx_equal_p (dst, src1))
9890 if (rtx_equal_p (dst, src2))
9893 /* Next highest priority is that immediate constants come second. */
9894 if (immediate_operand (src2, mode))
9896 if (immediate_operand (src1, mode))
9899 /* Lowest priority is that memory references should come second. */
9909 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9910 destination to use for the operation. If different from the true
9911 destination in operands[0], a copy operation will be required. */
9914 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9917 rtx dst = operands[0];
9918 rtx src1 = operands[1];
9919 rtx src2 = operands[2];
9921 /* Canonicalize operand order. */
9922 if (ix86_swap_binary_operands_p (code, mode, operands))
9929 /* Both source operands cannot be in memory. */
9930 if (MEM_P (src1) && MEM_P (src2))
9932 /* Optimization: Only read from memory once. */
9933 if (rtx_equal_p (src1, src2))
9935 src2 = force_reg (mode, src2);
9939 src2 = force_reg (mode, src2);
9942 /* If the destination is memory, and we do not have matching source
9943 operands, do things in registers. */
9944 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9945 dst = gen_reg_rtx (mode);
9947 /* Source 1 cannot be a constant. */
9948 if (CONSTANT_P (src1))
9949 src1 = force_reg (mode, src1);
9951 /* Source 1 cannot be a non-matching memory. */
9952 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9953 src1 = force_reg (mode, src1);
9960 /* Similarly, but assume that the destination has already been
9964 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9965 enum machine_mode mode, rtx operands[])
9967 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9968 gcc_assert (dst == operands[0]);
9971 /* Attempt to expand a binary operator. Make the expansion closer to the
9972 actual machine, then just general_operand, which will allow 3 separate
9973 memory references (one output, two input) in a single insn. */
9976 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9979 rtx src1, src2, dst, op, clob;
9981 dst = ix86_fixup_binary_operands (code, mode, operands);
9985 /* Emit the instruction. */
9987 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9988 if (reload_in_progress)
9990 /* Reload doesn't know about the flags register, and doesn't know that
9991 it doesn't want to clobber it. We can only do this with PLUS. */
9992 gcc_assert (code == PLUS);
9997 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9998 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10001 /* Fix up the destination if needed. */
10002 if (dst != operands[0])
10003 emit_move_insn (operands[0], dst);
10006 /* Return TRUE or FALSE depending on whether the binary operator meets the
10007 appropriate constraints. */
10010 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10013 rtx dst = operands[0];
10014 rtx src1 = operands[1];
10015 rtx src2 = operands[2];
10017 /* Both source operands cannot be in memory. */
10018 if (MEM_P (src1) && MEM_P (src2))
10021 /* Canonicalize operand order for commutative operators. */
10022 if (ix86_swap_binary_operands_p (code, mode, operands))
10029 /* If the destination is memory, we must have a matching source operand. */
10030 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10033 /* Source 1 cannot be a constant. */
10034 if (CONSTANT_P (src1))
10037 /* Source 1 cannot be a non-matching memory. */
10038 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10044 /* Attempt to expand a unary operator. Make the expansion closer to the
10045 actual machine, then just general_operand, which will allow 2 separate
10046 memory references (one output, one input) in a single insn. */
10049 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10052 int matching_memory;
10053 rtx src, dst, op, clob;
10058 /* If the destination is memory, and we do not have matching source
10059 operands, do things in registers. */
10060 matching_memory = 0;
10063 if (rtx_equal_p (dst, src))
10064 matching_memory = 1;
10066 dst = gen_reg_rtx (mode);
10069 /* When source operand is memory, destination must match. */
10070 if (MEM_P (src) && !matching_memory)
10071 src = force_reg (mode, src);
10073 /* Emit the instruction. */
10075 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10076 if (reload_in_progress || code == NOT)
10078 /* Reload doesn't know about the flags register, and doesn't know that
10079 it doesn't want to clobber it. */
10080 gcc_assert (code == NOT);
10085 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10086 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10089 /* Fix up the destination if needed. */
10090 if (dst != operands[0])
10091 emit_move_insn (operands[0], dst);
10094 /* Return TRUE or FALSE depending on whether the unary operator meets the
10095 appropriate constraints. */
10098 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10099 enum machine_mode mode ATTRIBUTE_UNUSED,
10100 rtx operands[2] ATTRIBUTE_UNUSED)
10102 /* If one of operands is memory, source and destination must match. */
10103 if ((MEM_P (operands[0])
10104 || MEM_P (operands[1]))
10105 && ! rtx_equal_p (operands[0], operands[1]))
10110 /* Post-reload splitter for converting an SF or DFmode value in an
10111 SSE register into an unsigned SImode. */
10114 ix86_split_convert_uns_si_sse (rtx operands[])
10116 enum machine_mode vecmode;
10117 rtx value, large, zero_or_two31, input, two31, x;
10119 large = operands[1];
10120 zero_or_two31 = operands[2];
10121 input = operands[3];
10122 two31 = operands[4];
10123 vecmode = GET_MODE (large);
10124 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10126 /* Load up the value into the low element. We must ensure that the other
10127 elements are valid floats -- zero is the easiest such value. */
10130 if (vecmode == V4SFmode)
10131 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10133 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10137 input = gen_rtx_REG (vecmode, REGNO (input));
10138 emit_move_insn (value, CONST0_RTX (vecmode));
10139 if (vecmode == V4SFmode)
10140 emit_insn (gen_sse_movss (value, value, input));
10142 emit_insn (gen_sse2_movsd (value, value, input));
10145 emit_move_insn (large, two31);
10146 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10148 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10149 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10151 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10152 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10154 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10155 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10157 large = gen_rtx_REG (V4SImode, REGNO (large));
10158 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10160 x = gen_rtx_REG (V4SImode, REGNO (value));
10161 if (vecmode == V4SFmode)
10162 emit_insn (gen_sse2_cvttps2dq (x, value));
10164 emit_insn (gen_sse2_cvttpd2dq (x, value));
10167 emit_insn (gen_xorv4si3 (value, value, large));
10170 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10171 Expects the 64-bit DImode to be supplied in a pair of integral
10172 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10173 -mfpmath=sse, !optimize_size only. */
10176 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10178 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10179 rtx int_xmm, fp_xmm;
10180 rtx biases, exponents;
10183 int_xmm = gen_reg_rtx (V4SImode);
10184 if (TARGET_INTER_UNIT_MOVES)
10185 emit_insn (gen_movdi_to_sse (int_xmm, input));
10186 else if (TARGET_SSE_SPLIT_REGS)
10188 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10189 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10193 x = gen_reg_rtx (V2DImode);
10194 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10195 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10198 x = gen_rtx_CONST_VECTOR (V4SImode,
10199 gen_rtvec (4, GEN_INT (0x43300000UL),
10200 GEN_INT (0x45300000UL),
10201 const0_rtx, const0_rtx));
10202 exponents = validize_mem (force_const_mem (V4SImode, x));
10204 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10205 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10207 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10208 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10209 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10210 (0x1.0p84 + double(fp_value_hi_xmm)).
10211 Note these exponents differ by 32. */
10213 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10215 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10216 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10217 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10218 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10219 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10220 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10221 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10222 biases = validize_mem (force_const_mem (V2DFmode, biases));
10223 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10225 /* Add the upper and lower DFmode values together. */
10227 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10230 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10231 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10232 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10235 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10238 /* Convert an unsigned SImode value into a DFmode. Only currently used
10239 for SSE, but applicable anywhere. */
10242 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10244 REAL_VALUE_TYPE TWO31r;
10247 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10248 NULL, 1, OPTAB_DIRECT);
10250 fp = gen_reg_rtx (DFmode);
10251 emit_insn (gen_floatsidf2 (fp, x));
10253 real_ldexp (&TWO31r, &dconst1, 31);
10254 x = const_double_from_real_value (TWO31r, DFmode);
10256 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10258 emit_move_insn (target, x);
10261 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10262 32-bit mode; otherwise we have a direct convert instruction. */
10265 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10267 REAL_VALUE_TYPE TWO32r;
10268 rtx fp_lo, fp_hi, x;
10270 fp_lo = gen_reg_rtx (DFmode);
10271 fp_hi = gen_reg_rtx (DFmode);
10273 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10275 real_ldexp (&TWO32r, &dconst1, 32);
10276 x = const_double_from_real_value (TWO32r, DFmode);
10277 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10279 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10281 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10284 emit_move_insn (target, x);
10287 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10288 For x86_32, -mfpmath=sse, !optimize_size only. */
10290 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10292 REAL_VALUE_TYPE ONE16r;
10293 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10295 real_ldexp (&ONE16r, &dconst1, 16);
10296 x = const_double_from_real_value (ONE16r, SFmode);
10297 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10298 NULL, 0, OPTAB_DIRECT);
10299 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10300 NULL, 0, OPTAB_DIRECT);
10301 fp_hi = gen_reg_rtx (SFmode);
10302 fp_lo = gen_reg_rtx (SFmode);
10303 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10304 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10305 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10307 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10309 if (!rtx_equal_p (target, fp_hi))
10310 emit_move_insn (target, fp_hi);
10313 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10314 then replicate the value for all elements of the vector
10318 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10325 v = gen_rtvec (4, value, value, value, value);
10327 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10328 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10329 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10333 v = gen_rtvec (2, value, value);
10335 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10336 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10339 gcc_unreachable ();
10343 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10344 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10345 true, then replicate the mask for all elements of the vector register.
10346 If INVERT is true, then create a mask excluding the sign bit. */
10349 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10351 enum machine_mode vec_mode;
10352 HOST_WIDE_INT hi, lo;
10357 /* Find the sign bit, sign extended to 2*HWI. */
10358 if (mode == SFmode)
10359 lo = 0x80000000, hi = lo < 0;
10360 else if (HOST_BITS_PER_WIDE_INT >= 64)
10361 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10363 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10366 lo = ~lo, hi = ~hi;
10368 /* Force this value into the low part of a fp vector constant. */
10369 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10370 mask = gen_lowpart (mode, mask);
10372 v = ix86_build_const_vector (mode, vect, mask);
10373 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10374 return force_reg (vec_mode, v);
10377 /* Generate code for floating point ABS or NEG. */
10380 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10383 rtx mask, set, use, clob, dst, src;
10384 bool matching_memory;
10385 bool use_sse = false;
10386 bool vector_mode = VECTOR_MODE_P (mode);
10387 enum machine_mode elt_mode = mode;
10391 elt_mode = GET_MODE_INNER (mode);
10394 else if (TARGET_SSE_MATH)
10395 use_sse = SSE_FLOAT_MODE_P (mode);
10397 /* NEG and ABS performed with SSE use bitwise mask operations.
10398 Create the appropriate mask now. */
10400 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10407 /* If the destination is memory, and we don't have matching source
10408 operands or we're using the x87, do things in registers. */
10409 matching_memory = false;
10412 if (use_sse && rtx_equal_p (dst, src))
10413 matching_memory = true;
10415 dst = gen_reg_rtx (mode);
10417 if (MEM_P (src) && !matching_memory)
10418 src = force_reg (mode, src);
10422 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10423 set = gen_rtx_SET (VOIDmode, dst, set);
10428 set = gen_rtx_fmt_e (code, mode, src);
10429 set = gen_rtx_SET (VOIDmode, dst, set);
10432 use = gen_rtx_USE (VOIDmode, mask);
10433 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10434 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10435 gen_rtvec (3, set, use, clob)));
10441 if (dst != operands[0])
10442 emit_move_insn (operands[0], dst);
10445 /* Expand a copysign operation. Special case operand 0 being a constant. */
10448 ix86_expand_copysign (rtx operands[])
10450 enum machine_mode mode, vmode;
10451 rtx dest, op0, op1, mask, nmask;
10453 dest = operands[0];
10457 mode = GET_MODE (dest);
10458 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10460 if (GET_CODE (op0) == CONST_DOUBLE)
10464 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10465 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10467 if (op0 == CONST0_RTX (mode))
10468 op0 = CONST0_RTX (vmode);
10471 if (mode == SFmode)
10472 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10473 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10475 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10476 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10479 mask = ix86_build_signbit_mask (mode, 0, 0);
10481 if (mode == SFmode)
10482 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10484 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10488 nmask = ix86_build_signbit_mask (mode, 0, 1);
10489 mask = ix86_build_signbit_mask (mode, 0, 0);
10491 if (mode == SFmode)
10492 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10494 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10498 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10499 be a constant, and so has already been expanded into a vector constant. */
10502 ix86_split_copysign_const (rtx operands[])
10504 enum machine_mode mode, vmode;
10505 rtx dest, op0, op1, mask, x;
10507 dest = operands[0];
10510 mask = operands[3];
10512 mode = GET_MODE (dest);
10513 vmode = GET_MODE (mask);
10515 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10516 x = gen_rtx_AND (vmode, dest, mask);
10517 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10519 if (op0 != CONST0_RTX (vmode))
10521 x = gen_rtx_IOR (vmode, dest, op0);
10522 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10526 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10527 so we have to do two masks. */
10530 ix86_split_copysign_var (rtx operands[])
10532 enum machine_mode mode, vmode;
10533 rtx dest, scratch, op0, op1, mask, nmask, x;
10535 dest = operands[0];
10536 scratch = operands[1];
10539 nmask = operands[4];
10540 mask = operands[5];
10542 mode = GET_MODE (dest);
10543 vmode = GET_MODE (mask);
10545 if (rtx_equal_p (op0, op1))
10547 /* Shouldn't happen often (it's useless, obviously), but when it does
10548 we'd generate incorrect code if we continue below. */
10549 emit_move_insn (dest, op0);
10553 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10555 gcc_assert (REGNO (op1) == REGNO (scratch));
10557 x = gen_rtx_AND (vmode, scratch, mask);
10558 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10561 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10562 x = gen_rtx_NOT (vmode, dest);
10563 x = gen_rtx_AND (vmode, x, op0);
10564 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10568 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10570 x = gen_rtx_AND (vmode, scratch, mask);
10572 else /* alternative 2,4 */
10574 gcc_assert (REGNO (mask) == REGNO (scratch));
10575 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10576 x = gen_rtx_AND (vmode, scratch, op1);
10578 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10580 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10582 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10583 x = gen_rtx_AND (vmode, dest, nmask);
10585 else /* alternative 3,4 */
10587 gcc_assert (REGNO (nmask) == REGNO (dest));
10589 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10590 x = gen_rtx_AND (vmode, dest, op0);
10592 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10595 x = gen_rtx_IOR (vmode, dest, scratch);
10596 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10599 /* Return TRUE or FALSE depending on whether the first SET in INSN
10600 has source and destination with matching CC modes, and that the
10601 CC mode is at least as constrained as REQ_MODE. */
10604 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10607 enum machine_mode set_mode;
10609 set = PATTERN (insn);
10610 if (GET_CODE (set) == PARALLEL)
10611 set = XVECEXP (set, 0, 0);
10612 gcc_assert (GET_CODE (set) == SET);
10613 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10615 set_mode = GET_MODE (SET_DEST (set));
10619 if (req_mode != CCNOmode
10620 && (req_mode != CCmode
10621 || XEXP (SET_SRC (set), 1) != const0_rtx))
10625 if (req_mode == CCGCmode)
10629 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10633 if (req_mode == CCZmode)
10640 gcc_unreachable ();
10643 return (GET_MODE (SET_SRC (set)) == set_mode);
10646 /* Generate insn patterns to do an integer compare of OPERANDS. */
10649 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10651 enum machine_mode cmpmode;
10654 cmpmode = SELECT_CC_MODE (code, op0, op1);
10655 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10657 /* This is very simple, but making the interface the same as in the
10658 FP case makes the rest of the code easier. */
10659 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10660 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10662 /* Return the test that should be put into the flags user, i.e.
10663 the bcc, scc, or cmov instruction. */
10664 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10667 /* Figure out whether to use ordered or unordered fp comparisons.
10668 Return the appropriate mode to use. */
10671 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10673 /* ??? In order to make all comparisons reversible, we do all comparisons
10674 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10675 all forms trapping and nontrapping comparisons, we can make inequality
10676 comparisons trapping again, since it results in better code when using
10677 FCOM based compares. */
10678 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10682 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10684 enum machine_mode mode = GET_MODE (op0);
10686 if (SCALAR_FLOAT_MODE_P (mode))
10688 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10689 return ix86_fp_compare_mode (code);
10694 /* Only zero flag is needed. */
10695 case EQ: /* ZF=0 */
10696 case NE: /* ZF!=0 */
10698 /* Codes needing carry flag. */
10699 case GEU: /* CF=0 */
10700 case GTU: /* CF=0 & ZF=0 */
10701 case LTU: /* CF=1 */
10702 case LEU: /* CF=1 | ZF=1 */
10704 /* Codes possibly doable only with sign flag when
10705 comparing against zero. */
10706 case GE: /* SF=OF or SF=0 */
10707 case LT: /* SF<>OF or SF=1 */
10708 if (op1 == const0_rtx)
10711 /* For other cases Carry flag is not required. */
10713 /* Codes doable only with sign flag when comparing
10714 against zero, but we miss jump instruction for it
10715 so we need to use relational tests against overflow
10716 that thus needs to be zero. */
10717 case GT: /* ZF=0 & SF=OF */
10718 case LE: /* ZF=1 | SF<>OF */
10719 if (op1 == const0_rtx)
10723 /* strcmp pattern do (use flags) and combine may ask us for proper
10728 gcc_unreachable ();
10732 /* Return the fixed registers used for condition codes. */
10735 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10742 /* If two condition code modes are compatible, return a condition code
10743 mode which is compatible with both. Otherwise, return
10746 static enum machine_mode
10747 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10752 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10755 if ((m1 == CCGCmode && m2 == CCGOCmode)
10756 || (m1 == CCGOCmode && m2 == CCGCmode))
10762 gcc_unreachable ();
10784 /* These are only compatible with themselves, which we already
10790 /* Split comparison code CODE into comparisons we can do using branch
10791 instructions. BYPASS_CODE is comparison code for branch that will
10792 branch around FIRST_CODE and SECOND_CODE. If some of branches
10793 is not required, set value to UNKNOWN.
10794 We never require more than two branches. */
10797 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10798 enum rtx_code *first_code,
10799 enum rtx_code *second_code)
10801 *first_code = code;
10802 *bypass_code = UNKNOWN;
10803 *second_code = UNKNOWN;
10805 /* The fcomi comparison sets flags as follows:
10815 case GT: /* GTU - CF=0 & ZF=0 */
10816 case GE: /* GEU - CF=0 */
10817 case ORDERED: /* PF=0 */
10818 case UNORDERED: /* PF=1 */
10819 case UNEQ: /* EQ - ZF=1 */
10820 case UNLT: /* LTU - CF=1 */
10821 case UNLE: /* LEU - CF=1 | ZF=1 */
10822 case LTGT: /* EQ - ZF=0 */
10824 case LT: /* LTU - CF=1 - fails on unordered */
10825 *first_code = UNLT;
10826 *bypass_code = UNORDERED;
10828 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10829 *first_code = UNLE;
10830 *bypass_code = UNORDERED;
10832 case EQ: /* EQ - ZF=1 - fails on unordered */
10833 *first_code = UNEQ;
10834 *bypass_code = UNORDERED;
10836 case NE: /* NE - ZF=0 - fails on unordered */
10837 *first_code = LTGT;
10838 *second_code = UNORDERED;
10840 case UNGE: /* GEU - CF=0 - fails on unordered */
10842 *second_code = UNORDERED;
10844 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10846 *second_code = UNORDERED;
10849 gcc_unreachable ();
10851 if (!TARGET_IEEE_FP)
10853 *second_code = UNKNOWN;
10854 *bypass_code = UNKNOWN;
10858 /* Return cost of comparison done fcom + arithmetics operations on AX.
10859 All following functions do use number of instructions as a cost metrics.
10860 In future this should be tweaked to compute bytes for optimize_size and
10861 take into account performance of various instructions on various CPUs. */
10863 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10865 if (!TARGET_IEEE_FP)
10867 /* The cost of code output by ix86_expand_fp_compare. */
10891 gcc_unreachable ();
10895 /* Return cost of comparison done using fcomi operation.
10896 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10898 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10900 enum rtx_code bypass_code, first_code, second_code;
10901 /* Return arbitrarily high cost when instruction is not supported - this
10902 prevents gcc from using it. */
10905 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10906 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10909 /* Return cost of comparison done using sahf operation.
10910 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10912 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10914 enum rtx_code bypass_code, first_code, second_code;
10915 /* Return arbitrarily high cost when instruction is not preferred - this
10916 avoids gcc from using it. */
10917 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10919 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10920 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10923 /* Compute cost of the comparison done using any method.
10924 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10926 ix86_fp_comparison_cost (enum rtx_code code)
10928 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10931 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10932 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10934 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10935 if (min > sahf_cost)
10937 if (min > fcomi_cost)
10942 /* Return true if we should use an FCOMI instruction for this
10946 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10948 enum rtx_code swapped_code = swap_condition (code);
10950 return ((ix86_fp_comparison_cost (code)
10951 == ix86_fp_comparison_fcomi_cost (code))
10952 || (ix86_fp_comparison_cost (swapped_code)
10953 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10956 /* Swap, force into registers, or otherwise massage the two operands
10957 to a fp comparison. The operands are updated in place; the new
10958 comparison code is returned. */
10960 static enum rtx_code
10961 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10963 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10964 rtx op0 = *pop0, op1 = *pop1;
10965 enum machine_mode op_mode = GET_MODE (op0);
10966 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10968 /* All of the unordered compare instructions only work on registers.
10969 The same is true of the fcomi compare instructions. The XFmode
10970 compare instructions require registers except when comparing
10971 against zero or when converting operand 1 from fixed point to
10975 && (fpcmp_mode == CCFPUmode
10976 || (op_mode == XFmode
10977 && ! (standard_80387_constant_p (op0) == 1
10978 || standard_80387_constant_p (op1) == 1)
10979 && GET_CODE (op1) != FLOAT)
10980 || ix86_use_fcomi_compare (code)))
10982 op0 = force_reg (op_mode, op0);
10983 op1 = force_reg (op_mode, op1);
10987 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10988 things around if they appear profitable, otherwise force op0
10989 into a register. */
10991 if (standard_80387_constant_p (op0) == 0
10993 && ! (standard_80387_constant_p (op1) == 0
10997 tmp = op0, op0 = op1, op1 = tmp;
10998 code = swap_condition (code);
11002 op0 = force_reg (op_mode, op0);
11004 if (CONSTANT_P (op1))
11006 int tmp = standard_80387_constant_p (op1);
11008 op1 = validize_mem (force_const_mem (op_mode, op1));
11012 op1 = force_reg (op_mode, op1);
11015 op1 = force_reg (op_mode, op1);
11019 /* Try to rearrange the comparison to make it cheaper. */
11020 if (ix86_fp_comparison_cost (code)
11021 > ix86_fp_comparison_cost (swap_condition (code))
11022 && (REG_P (op1) || !no_new_pseudos))
11025 tmp = op0, op0 = op1, op1 = tmp;
11026 code = swap_condition (code);
11028 op0 = force_reg (op_mode, op0);
11036 /* Convert comparison codes we use to represent FP comparison to integer
11037 code that will result in proper branch. Return UNKNOWN if no such code
11041 ix86_fp_compare_code_to_integer (enum rtx_code code)
11070 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11073 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11074 rtx *second_test, rtx *bypass_test)
11076 enum machine_mode fpcmp_mode, intcmp_mode;
11078 int cost = ix86_fp_comparison_cost (code);
11079 enum rtx_code bypass_code, first_code, second_code;
11081 fpcmp_mode = ix86_fp_compare_mode (code);
11082 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11085 *second_test = NULL_RTX;
11087 *bypass_test = NULL_RTX;
11089 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11091 /* Do fcomi/sahf based test when profitable. */
11092 if ((TARGET_CMOVE || TARGET_SAHF)
11093 && (bypass_code == UNKNOWN || bypass_test)
11094 && (second_code == UNKNOWN || second_test)
11095 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11099 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11100 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11107 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11109 scratch = gen_reg_rtx (HImode);
11110 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11111 emit_insn (gen_x86_sahf_1 (scratch));
11114 /* The FP codes work out to act like unsigned. */
11115 intcmp_mode = fpcmp_mode;
11117 if (bypass_code != UNKNOWN)
11118 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11119 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11121 if (second_code != UNKNOWN)
11122 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11123 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11128 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11129 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11130 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11132 scratch = gen_reg_rtx (HImode);
11133 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11135 /* In the unordered case, we have to check C2 for NaN's, which
11136 doesn't happen to work out to anything nice combination-wise.
11137 So do some bit twiddling on the value we've got in AH to come
11138 up with an appropriate set of condition codes. */
11140 intcmp_mode = CCNOmode;
11145 if (code == GT || !TARGET_IEEE_FP)
11147 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11152 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11153 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11154 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11155 intcmp_mode = CCmode;
11161 if (code == LT && TARGET_IEEE_FP)
11163 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11164 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11165 intcmp_mode = CCmode;
11170 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11176 if (code == GE || !TARGET_IEEE_FP)
11178 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11184 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11191 if (code == LE && TARGET_IEEE_FP)
11193 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11194 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11195 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11196 intcmp_mode = CCmode;
11201 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11207 if (code == EQ && TARGET_IEEE_FP)
11209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11210 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11211 intcmp_mode = CCmode;
11216 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11223 if (code == NE && TARGET_IEEE_FP)
11225 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11226 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11242 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11247 gcc_unreachable ();
11251 /* Return the test that should be put into the flags user, i.e.
11252 the bcc, scc, or cmov instruction. */
11253 return gen_rtx_fmt_ee (code, VOIDmode,
11254 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11259 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11262 op0 = ix86_compare_op0;
11263 op1 = ix86_compare_op1;
11266 *second_test = NULL_RTX;
11268 *bypass_test = NULL_RTX;
11270 if (ix86_compare_emitted)
11272 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11273 ix86_compare_emitted = NULL_RTX;
11275 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11277 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11278 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11279 second_test, bypass_test);
11282 ret = ix86_expand_int_compare (code, op0, op1);
11287 /* Return true if the CODE will result in nontrivial jump sequence. */
11289 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11291 enum rtx_code bypass_code, first_code, second_code;
11294 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11295 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11299 ix86_expand_branch (enum rtx_code code, rtx label)
11303 /* If we have emitted a compare insn, go straight to simple.
11304 ix86_expand_compare won't emit anything if ix86_compare_emitted
11306 if (ix86_compare_emitted)
11309 switch (GET_MODE (ix86_compare_op0))
11315 tmp = ix86_expand_compare (code, NULL, NULL);
11316 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11317 gen_rtx_LABEL_REF (VOIDmode, label),
11319 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11328 enum rtx_code bypass_code, first_code, second_code;
11330 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11331 &ix86_compare_op1);
11333 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11335 /* Check whether we will use the natural sequence with one jump. If
11336 so, we can expand jump early. Otherwise delay expansion by
11337 creating compound insn to not confuse optimizers. */
11338 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11341 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11342 gen_rtx_LABEL_REF (VOIDmode, label),
11343 pc_rtx, NULL_RTX, NULL_RTX);
11347 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11348 ix86_compare_op0, ix86_compare_op1);
11349 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11350 gen_rtx_LABEL_REF (VOIDmode, label),
11352 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11354 use_fcomi = ix86_use_fcomi_compare (code);
11355 vec = rtvec_alloc (3 + !use_fcomi);
11356 RTVEC_ELT (vec, 0) = tmp;
11358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11363 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11365 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11374 /* Expand DImode branch into multiple compare+branch. */
11376 rtx lo[2], hi[2], label2;
11377 enum rtx_code code1, code2, code3;
11378 enum machine_mode submode;
11380 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11382 tmp = ix86_compare_op0;
11383 ix86_compare_op0 = ix86_compare_op1;
11384 ix86_compare_op1 = tmp;
11385 code = swap_condition (code);
11387 if (GET_MODE (ix86_compare_op0) == DImode)
11389 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11390 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11395 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11396 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11400 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11401 avoid two branches. This costs one extra insn, so disable when
11402 optimizing for size. */
11404 if ((code == EQ || code == NE)
11406 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11411 if (hi[1] != const0_rtx)
11412 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11413 NULL_RTX, 0, OPTAB_WIDEN);
11416 if (lo[1] != const0_rtx)
11417 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11418 NULL_RTX, 0, OPTAB_WIDEN);
11420 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11421 NULL_RTX, 0, OPTAB_WIDEN);
11423 ix86_compare_op0 = tmp;
11424 ix86_compare_op1 = const0_rtx;
11425 ix86_expand_branch (code, label);
11429 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11430 op1 is a constant and the low word is zero, then we can just
11431 examine the high word. */
11433 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11436 case LT: case LTU: case GE: case GEU:
11437 ix86_compare_op0 = hi[0];
11438 ix86_compare_op1 = hi[1];
11439 ix86_expand_branch (code, label);
11445 /* Otherwise, we need two or three jumps. */
11447 label2 = gen_label_rtx ();
11450 code2 = swap_condition (code);
11451 code3 = unsigned_condition (code);
11455 case LT: case GT: case LTU: case GTU:
11458 case LE: code1 = LT; code2 = GT; break;
11459 case GE: code1 = GT; code2 = LT; break;
11460 case LEU: code1 = LTU; code2 = GTU; break;
11461 case GEU: code1 = GTU; code2 = LTU; break;
11463 case EQ: code1 = UNKNOWN; code2 = NE; break;
11464 case NE: code2 = UNKNOWN; break;
11467 gcc_unreachable ();
11472 * if (hi(a) < hi(b)) goto true;
11473 * if (hi(a) > hi(b)) goto false;
11474 * if (lo(a) < lo(b)) goto true;
11478 ix86_compare_op0 = hi[0];
11479 ix86_compare_op1 = hi[1];
11481 if (code1 != UNKNOWN)
11482 ix86_expand_branch (code1, label);
11483 if (code2 != UNKNOWN)
11484 ix86_expand_branch (code2, label2);
11486 ix86_compare_op0 = lo[0];
11487 ix86_compare_op1 = lo[1];
11488 ix86_expand_branch (code3, label);
11490 if (code2 != UNKNOWN)
11491 emit_label (label2);
11496 gcc_unreachable ();
11500 /* Split branch based on floating point condition. */
11502 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11503 rtx target1, rtx target2, rtx tmp, rtx pushed)
11505 rtx second, bypass;
11506 rtx label = NULL_RTX;
11508 int bypass_probability = -1, second_probability = -1, probability = -1;
11511 if (target2 != pc_rtx)
11514 code = reverse_condition_maybe_unordered (code);
11519 condition = ix86_expand_fp_compare (code, op1, op2,
11520 tmp, &second, &bypass);
11522 /* Remove pushed operand from stack. */
11524 ix86_free_from_memory (GET_MODE (pushed));
11526 if (split_branch_probability >= 0)
11528 /* Distribute the probabilities across the jumps.
11529 Assume the BYPASS and SECOND to be always test
11531 probability = split_branch_probability;
11533 /* Value of 1 is low enough to make no need for probability
11534 to be updated. Later we may run some experiments and see
11535 if unordered values are more frequent in practice. */
11537 bypass_probability = 1;
11539 second_probability = 1;
11541 if (bypass != NULL_RTX)
11543 label = gen_label_rtx ();
11544 i = emit_jump_insn (gen_rtx_SET
11546 gen_rtx_IF_THEN_ELSE (VOIDmode,
11548 gen_rtx_LABEL_REF (VOIDmode,
11551 if (bypass_probability >= 0)
11553 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11554 GEN_INT (bypass_probability),
11557 i = emit_jump_insn (gen_rtx_SET
11559 gen_rtx_IF_THEN_ELSE (VOIDmode,
11560 condition, target1, target2)));
11561 if (probability >= 0)
11563 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11564 GEN_INT (probability),
11566 if (second != NULL_RTX)
11568 i = emit_jump_insn (gen_rtx_SET
11570 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11572 if (second_probability >= 0)
11574 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11575 GEN_INT (second_probability),
11578 if (label != NULL_RTX)
11579 emit_label (label);
11583 ix86_expand_setcc (enum rtx_code code, rtx dest)
11585 rtx ret, tmp, tmpreg, equiv;
11586 rtx second_test, bypass_test;
11588 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11589 return 0; /* FAIL */
11591 gcc_assert (GET_MODE (dest) == QImode);
11593 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11594 PUT_MODE (ret, QImode);
11599 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11600 if (bypass_test || second_test)
11602 rtx test = second_test;
11604 rtx tmp2 = gen_reg_rtx (QImode);
11607 gcc_assert (!second_test);
11608 test = bypass_test;
11610 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11612 PUT_MODE (test, QImode);
11613 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11616 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11618 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11621 /* Attach a REG_EQUAL note describing the comparison result. */
11622 if (ix86_compare_op0 && ix86_compare_op1)
11624 equiv = simplify_gen_relational (code, QImode,
11625 GET_MODE (ix86_compare_op0),
11626 ix86_compare_op0, ix86_compare_op1);
11627 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11630 return 1; /* DONE */
11633 /* Expand comparison setting or clearing carry flag. Return true when
11634 successful and set pop for the operation. */
11636 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11638 enum machine_mode mode =
11639 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11641 /* Do not handle DImode compares that go through special path.
11642 Also we can't deal with FP compares yet. This is possible to add. */
11643 if (mode == (TARGET_64BIT ? TImode : DImode))
11646 if (SCALAR_FLOAT_MODE_P (mode))
11648 rtx second_test = NULL, bypass_test = NULL;
11649 rtx compare_op, compare_seq;
11651 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11653 /* Shortcut: following common codes never translate
11654 into carry flag compares. */
11655 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11656 || code == ORDERED || code == UNORDERED)
11659 /* These comparisons require zero flag; swap operands so they won't. */
11660 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11661 && !TARGET_IEEE_FP)
11666 code = swap_condition (code);
11669 /* Try to expand the comparison and verify that we end up with carry flag
11670 based comparison. This is fails to be true only when we decide to expand
11671 comparison using arithmetic that is not too common scenario. */
11673 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11674 &second_test, &bypass_test);
11675 compare_seq = get_insns ();
11678 if (second_test || bypass_test)
11680 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11681 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11682 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11684 code = GET_CODE (compare_op);
11685 if (code != LTU && code != GEU)
11687 emit_insn (compare_seq);
11691 if (!INTEGRAL_MODE_P (mode))
11699 /* Convert a==0 into (unsigned)a<1. */
11702 if (op1 != const0_rtx)
11705 code = (code == EQ ? LTU : GEU);
11708 /* Convert a>b into b<a or a>=b-1. */
11711 if (CONST_INT_P (op1))
11713 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11714 /* Bail out on overflow. We still can swap operands but that
11715 would force loading of the constant into register. */
11716 if (op1 == const0_rtx
11717 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11719 code = (code == GTU ? GEU : LTU);
11726 code = (code == GTU ? LTU : GEU);
11730 /* Convert a>=0 into (unsigned)a<0x80000000. */
11733 if (mode == DImode || op1 != const0_rtx)
11735 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11736 code = (code == LT ? GEU : LTU);
11740 if (mode == DImode || op1 != constm1_rtx)
11742 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11743 code = (code == LE ? GEU : LTU);
11749 /* Swapping operands may cause constant to appear as first operand. */
11750 if (!nonimmediate_operand (op0, VOIDmode))
11752 if (no_new_pseudos)
11754 op0 = force_reg (mode, op0);
11756 ix86_compare_op0 = op0;
11757 ix86_compare_op1 = op1;
11758 *pop = ix86_expand_compare (code, NULL, NULL);
11759 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11764 ix86_expand_int_movcc (rtx operands[])
11766 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11767 rtx compare_seq, compare_op;
11768 rtx second_test, bypass_test;
11769 enum machine_mode mode = GET_MODE (operands[0]);
11770 bool sign_bit_compare_p = false;;
11773 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11774 compare_seq = get_insns ();
11777 compare_code = GET_CODE (compare_op);
11779 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11780 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11781 sign_bit_compare_p = true;
11783 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11784 HImode insns, we'd be swallowed in word prefix ops. */
11786 if ((mode != HImode || TARGET_FAST_PREFIX)
11787 && (mode != (TARGET_64BIT ? TImode : DImode))
11788 && CONST_INT_P (operands[2])
11789 && CONST_INT_P (operands[3]))
11791 rtx out = operands[0];
11792 HOST_WIDE_INT ct = INTVAL (operands[2]);
11793 HOST_WIDE_INT cf = INTVAL (operands[3]);
11794 HOST_WIDE_INT diff;
11797 /* Sign bit compares are better done using shifts than we do by using
11799 if (sign_bit_compare_p
11800 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11801 ix86_compare_op1, &compare_op))
11803 /* Detect overlap between destination and compare sources. */
11806 if (!sign_bit_compare_p)
11808 bool fpcmp = false;
11810 compare_code = GET_CODE (compare_op);
11812 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11813 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11816 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11819 /* To simplify rest of code, restrict to the GEU case. */
11820 if (compare_code == LTU)
11822 HOST_WIDE_INT tmp = ct;
11825 compare_code = reverse_condition (compare_code);
11826 code = reverse_condition (code);
11831 PUT_CODE (compare_op,
11832 reverse_condition_maybe_unordered
11833 (GET_CODE (compare_op)));
11835 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11839 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11840 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11841 tmp = gen_reg_rtx (mode);
11843 if (mode == DImode)
11844 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11846 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11850 if (code == GT || code == GE)
11851 code = reverse_condition (code);
11854 HOST_WIDE_INT tmp = ct;
11859 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11860 ix86_compare_op1, VOIDmode, 0, -1);
11873 tmp = expand_simple_binop (mode, PLUS,
11875 copy_rtx (tmp), 1, OPTAB_DIRECT);
11886 tmp = expand_simple_binop (mode, IOR,
11888 copy_rtx (tmp), 1, OPTAB_DIRECT);
11890 else if (diff == -1 && ct)
11900 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11902 tmp = expand_simple_binop (mode, PLUS,
11903 copy_rtx (tmp), GEN_INT (cf),
11904 copy_rtx (tmp), 1, OPTAB_DIRECT);
11912 * andl cf - ct, dest
11922 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11925 tmp = expand_simple_binop (mode, AND,
11927 gen_int_mode (cf - ct, mode),
11928 copy_rtx (tmp), 1, OPTAB_DIRECT);
11930 tmp = expand_simple_binop (mode, PLUS,
11931 copy_rtx (tmp), GEN_INT (ct),
11932 copy_rtx (tmp), 1, OPTAB_DIRECT);
11935 if (!rtx_equal_p (tmp, out))
11936 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11938 return 1; /* DONE */
11943 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
11946 tmp = ct, ct = cf, cf = tmp;
11949 if (SCALAR_FLOAT_MODE_P (cmp_mode))
11951 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
11953 /* We may be reversing unordered compare to normal compare, that
11954 is not valid in general (we may convert non-trapping condition
11955 to trapping one), however on i386 we currently emit all
11956 comparisons unordered. */
11957 compare_code = reverse_condition_maybe_unordered (compare_code);
11958 code = reverse_condition_maybe_unordered (code);
11962 compare_code = reverse_condition (compare_code);
11963 code = reverse_condition (code);
11967 compare_code = UNKNOWN;
11968 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11969 && CONST_INT_P (ix86_compare_op1))
11971 if (ix86_compare_op1 == const0_rtx
11972 && (code == LT || code == GE))
11973 compare_code = code;
11974 else if (ix86_compare_op1 == constm1_rtx)
11978 else if (code == GT)
11983 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11984 if (compare_code != UNKNOWN
11985 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11986 && (cf == -1 || ct == -1))
11988 /* If lea code below could be used, only optimize
11989 if it results in a 2 insn sequence. */
11991 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11992 || diff == 3 || diff == 5 || diff == 9)
11993 || (compare_code == LT && ct == -1)
11994 || (compare_code == GE && cf == -1))
11997 * notl op1 (if necessary)
12005 code = reverse_condition (code);
12008 out = emit_store_flag (out, code, ix86_compare_op0,
12009 ix86_compare_op1, VOIDmode, 0, -1);
12011 out = expand_simple_binop (mode, IOR,
12013 out, 1, OPTAB_DIRECT);
12014 if (out != operands[0])
12015 emit_move_insn (operands[0], out);
12017 return 1; /* DONE */
12022 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12023 || diff == 3 || diff == 5 || diff == 9)
12024 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12026 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12032 * lea cf(dest*(ct-cf)),dest
12036 * This also catches the degenerate setcc-only case.
12042 out = emit_store_flag (out, code, ix86_compare_op0,
12043 ix86_compare_op1, VOIDmode, 0, 1);
12046 /* On x86_64 the lea instruction operates on Pmode, so we need
12047 to get arithmetics done in proper mode to match. */
12049 tmp = copy_rtx (out);
12053 out1 = copy_rtx (out);
12054 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12058 tmp = gen_rtx_PLUS (mode, tmp, out1);
12064 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12067 if (!rtx_equal_p (tmp, out))
12070 out = force_operand (tmp, copy_rtx (out));
12072 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12074 if (!rtx_equal_p (out, operands[0]))
12075 emit_move_insn (operands[0], copy_rtx (out));
12077 return 1; /* DONE */
12081 * General case: Jumpful:
12082 * xorl dest,dest cmpl op1, op2
12083 * cmpl op1, op2 movl ct, dest
12084 * setcc dest jcc 1f
12085 * decl dest movl cf, dest
12086 * andl (cf-ct),dest 1:
12089 * Size 20. Size 14.
12091 * This is reasonably steep, but branch mispredict costs are
12092 * high on modern cpus, so consider failing only if optimizing
12096 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12097 && BRANCH_COST >= 2)
12101 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12106 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12108 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12110 /* We may be reversing unordered compare to normal compare,
12111 that is not valid in general (we may convert non-trapping
12112 condition to trapping one), however on i386 we currently
12113 emit all comparisons unordered. */
12114 code = reverse_condition_maybe_unordered (code);
12118 code = reverse_condition (code);
12119 if (compare_code != UNKNOWN)
12120 compare_code = reverse_condition (compare_code);
12124 if (compare_code != UNKNOWN)
12126 /* notl op1 (if needed)
12131 For x < 0 (resp. x <= -1) there will be no notl,
12132 so if possible swap the constants to get rid of the
12134 True/false will be -1/0 while code below (store flag
12135 followed by decrement) is 0/-1, so the constants need
12136 to be exchanged once more. */
12138 if (compare_code == GE || !cf)
12140 code = reverse_condition (code);
12145 HOST_WIDE_INT tmp = cf;
12150 out = emit_store_flag (out, code, ix86_compare_op0,
12151 ix86_compare_op1, VOIDmode, 0, -1);
12155 out = emit_store_flag (out, code, ix86_compare_op0,
12156 ix86_compare_op1, VOIDmode, 0, 1);
12158 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12159 copy_rtx (out), 1, OPTAB_DIRECT);
12162 out = expand_simple_binop (mode, AND, copy_rtx (out),
12163 gen_int_mode (cf - ct, mode),
12164 copy_rtx (out), 1, OPTAB_DIRECT);
12166 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12167 copy_rtx (out), 1, OPTAB_DIRECT);
12168 if (!rtx_equal_p (out, operands[0]))
12169 emit_move_insn (operands[0], copy_rtx (out));
12171 return 1; /* DONE */
12175 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12177 /* Try a few things more with specific constants and a variable. */
12180 rtx var, orig_out, out, tmp;
12182 if (BRANCH_COST <= 2)
12183 return 0; /* FAIL */
12185 /* If one of the two operands is an interesting constant, load a
12186 constant with the above and mask it in with a logical operation. */
12188 if (CONST_INT_P (operands[2]))
12191 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12192 operands[3] = constm1_rtx, op = and_optab;
12193 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12194 operands[3] = const0_rtx, op = ior_optab;
12196 return 0; /* FAIL */
12198 else if (CONST_INT_P (operands[3]))
12201 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12202 operands[2] = constm1_rtx, op = and_optab;
12203 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12204 operands[2] = const0_rtx, op = ior_optab;
12206 return 0; /* FAIL */
12209 return 0; /* FAIL */
12211 orig_out = operands[0];
12212 tmp = gen_reg_rtx (mode);
12215 /* Recurse to get the constant loaded. */
12216 if (ix86_expand_int_movcc (operands) == 0)
12217 return 0; /* FAIL */
12219 /* Mask in the interesting variable. */
12220 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12222 if (!rtx_equal_p (out, orig_out))
12223 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12225 return 1; /* DONE */
12229 * For comparison with above,
12239 if (! nonimmediate_operand (operands[2], mode))
12240 operands[2] = force_reg (mode, operands[2]);
12241 if (! nonimmediate_operand (operands[3], mode))
12242 operands[3] = force_reg (mode, operands[3]);
12244 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12246 rtx tmp = gen_reg_rtx (mode);
12247 emit_move_insn (tmp, operands[3]);
12250 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12252 rtx tmp = gen_reg_rtx (mode);
12253 emit_move_insn (tmp, operands[2]);
12257 if (! register_operand (operands[2], VOIDmode)
12259 || ! register_operand (operands[3], VOIDmode)))
12260 operands[2] = force_reg (mode, operands[2]);
12263 && ! register_operand (operands[3], VOIDmode))
12264 operands[3] = force_reg (mode, operands[3]);
12266 emit_insn (compare_seq);
12267 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12268 gen_rtx_IF_THEN_ELSE (mode,
12269 compare_op, operands[2],
12272 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12273 gen_rtx_IF_THEN_ELSE (mode,
12275 copy_rtx (operands[3]),
12276 copy_rtx (operands[0]))));
12278 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12279 gen_rtx_IF_THEN_ELSE (mode,
12281 copy_rtx (operands[2]),
12282 copy_rtx (operands[0]))));
12284 return 1; /* DONE */
12287 /* Swap, force into registers, or otherwise massage the two operands
12288 to an sse comparison with a mask result. Thus we differ a bit from
12289 ix86_prepare_fp_compare_args which expects to produce a flags result.
12291 The DEST operand exists to help determine whether to commute commutative
12292 operators. The POP0/POP1 operands are updated in place. The new
12293 comparison code is returned, or UNKNOWN if not implementable. */
12295 static enum rtx_code
12296 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12297 rtx *pop0, rtx *pop1)
12305 /* We have no LTGT as an operator. We could implement it with
12306 NE & ORDERED, but this requires an extra temporary. It's
12307 not clear that it's worth it. */
12314 /* These are supported directly. */
12321 /* For commutative operators, try to canonicalize the destination
12322 operand to be first in the comparison - this helps reload to
12323 avoid extra moves. */
12324 if (!dest || !rtx_equal_p (dest, *pop1))
12332 /* These are not supported directly. Swap the comparison operands
12333 to transform into something that is supported. */
12337 code = swap_condition (code);
12341 gcc_unreachable ();
12347 /* Detect conditional moves that exactly match min/max operational
12348 semantics. Note that this is IEEE safe, as long as we don't
12349 interchange the operands.
12351 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12352 and TRUE if the operation is successful and instructions are emitted. */
12355 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12356 rtx cmp_op1, rtx if_true, rtx if_false)
12358 enum machine_mode mode;
12364 else if (code == UNGE)
12367 if_true = if_false;
12373 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12375 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12380 mode = GET_MODE (dest);
12382 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12383 but MODE may be a vector mode and thus not appropriate. */
12384 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12386 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12389 if_true = force_reg (mode, if_true);
12390 v = gen_rtvec (2, if_true, if_false);
12391 tmp = gen_rtx_UNSPEC (mode, v, u);
12395 code = is_min ? SMIN : SMAX;
12396 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12399 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12403 /* Expand an sse vector comparison. Return the register with the result. */
12406 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12407 rtx op_true, rtx op_false)
12409 enum machine_mode mode = GET_MODE (dest);
12412 cmp_op0 = force_reg (mode, cmp_op0);
12413 if (!nonimmediate_operand (cmp_op1, mode))
12414 cmp_op1 = force_reg (mode, cmp_op1);
12417 || reg_overlap_mentioned_p (dest, op_true)
12418 || reg_overlap_mentioned_p (dest, op_false))
12419 dest = gen_reg_rtx (mode);
12421 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12422 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12427 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12428 operations. This is used for both scalar and vector conditional moves. */
12431 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12433 enum machine_mode mode = GET_MODE (dest);
12436 if (op_false == CONST0_RTX (mode))
12438 op_true = force_reg (mode, op_true);
12439 x = gen_rtx_AND (mode, cmp, op_true);
12440 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12442 else if (op_true == CONST0_RTX (mode))
12444 op_false = force_reg (mode, op_false);
12445 x = gen_rtx_NOT (mode, cmp);
12446 x = gen_rtx_AND (mode, x, op_false);
12447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12451 op_true = force_reg (mode, op_true);
12452 op_false = force_reg (mode, op_false);
12454 t2 = gen_reg_rtx (mode);
12456 t3 = gen_reg_rtx (mode);
12460 x = gen_rtx_AND (mode, op_true, cmp);
12461 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12463 x = gen_rtx_NOT (mode, cmp);
12464 x = gen_rtx_AND (mode, x, op_false);
12465 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12467 x = gen_rtx_IOR (mode, t3, t2);
12468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12472 /* Expand a floating-point conditional move. Return true if successful. */
12475 ix86_expand_fp_movcc (rtx operands[])
12477 enum machine_mode mode = GET_MODE (operands[0]);
12478 enum rtx_code code = GET_CODE (operands[1]);
12479 rtx tmp, compare_op, second_test, bypass_test;
12481 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12483 enum machine_mode cmode;
12485 /* Since we've no cmove for sse registers, don't force bad register
12486 allocation just to gain access to it. Deny movcc when the
12487 comparison mode doesn't match the move mode. */
12488 cmode = GET_MODE (ix86_compare_op0);
12489 if (cmode == VOIDmode)
12490 cmode = GET_MODE (ix86_compare_op1);
12494 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12496 &ix86_compare_op1);
12497 if (code == UNKNOWN)
12500 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12501 ix86_compare_op1, operands[2],
12505 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12506 ix86_compare_op1, operands[2], operands[3]);
12507 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12511 /* The floating point conditional move instructions don't directly
12512 support conditions resulting from a signed integer comparison. */
12514 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12516 /* The floating point conditional move instructions don't directly
12517 support signed integer comparisons. */
12519 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12521 gcc_assert (!second_test && !bypass_test);
12522 tmp = gen_reg_rtx (QImode);
12523 ix86_expand_setcc (code, tmp);
12525 ix86_compare_op0 = tmp;
12526 ix86_compare_op1 = const0_rtx;
12527 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12529 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12531 tmp = gen_reg_rtx (mode);
12532 emit_move_insn (tmp, operands[3]);
12535 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12537 tmp = gen_reg_rtx (mode);
12538 emit_move_insn (tmp, operands[2]);
12542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12543 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12544 operands[2], operands[3])));
12546 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12547 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12548 operands[3], operands[0])));
12550 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12551 gen_rtx_IF_THEN_ELSE (mode, second_test,
12552 operands[2], operands[0])));
12557 /* Expand a floating-point vector conditional move; a vcond operation
12558 rather than a movcc operation. */
12561 ix86_expand_fp_vcond (rtx operands[])
12563 enum rtx_code code = GET_CODE (operands[3]);
12566 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12567 &operands[4], &operands[5]);
12568 if (code == UNKNOWN)
12571 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12572 operands[5], operands[1], operands[2]))
12575 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12576 operands[1], operands[2]);
12577 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12581 /* Expand a signed integral vector conditional move. */
12584 ix86_expand_int_vcond (rtx operands[])
12586 enum machine_mode mode = GET_MODE (operands[0]);
12587 enum rtx_code code = GET_CODE (operands[3]);
12588 bool negate = false;
12591 cop0 = operands[4];
12592 cop1 = operands[5];
12594 /* Canonicalize the comparison to EQ, GT, GTU. */
12605 code = reverse_condition (code);
12611 code = reverse_condition (code);
12617 code = swap_condition (code);
12618 x = cop0, cop0 = cop1, cop1 = x;
12622 gcc_unreachable ();
12625 /* Unsigned parallel compare is not supported by the hardware. Play some
12626 tricks to turn this into a signed comparison against 0. */
12629 cop0 = force_reg (mode, cop0);
12637 /* Perform a parallel modulo subtraction. */
12638 t1 = gen_reg_rtx (mode);
12639 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12641 /* Extract the original sign bit of op0. */
12642 mask = GEN_INT (-0x80000000);
12643 mask = gen_rtx_CONST_VECTOR (mode,
12644 gen_rtvec (4, mask, mask, mask, mask));
12645 mask = force_reg (mode, mask);
12646 t2 = gen_reg_rtx (mode);
12647 emit_insn (gen_andv4si3 (t2, cop0, mask));
12649 /* XOR it back into the result of the subtraction. This results
12650 in the sign bit set iff we saw unsigned underflow. */
12651 x = gen_reg_rtx (mode);
12652 emit_insn (gen_xorv4si3 (x, t1, t2));
12660 /* Perform a parallel unsigned saturating subtraction. */
12661 x = gen_reg_rtx (mode);
12662 emit_insn (gen_rtx_SET (VOIDmode, x,
12663 gen_rtx_US_MINUS (mode, cop0, cop1)));
12670 gcc_unreachable ();
12674 cop1 = CONST0_RTX (mode);
12677 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12678 operands[1+negate], operands[2-negate]);
12680 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12681 operands[2-negate]);
12685 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12686 true if we should do zero extension, else sign extension. HIGH_P is
12687 true if we want the N/2 high elements, else the low elements. */
12690 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12692 enum machine_mode imode = GET_MODE (operands[1]);
12693 rtx (*unpack)(rtx, rtx, rtx);
12700 unpack = gen_vec_interleave_highv16qi;
12702 unpack = gen_vec_interleave_lowv16qi;
12706 unpack = gen_vec_interleave_highv8hi;
12708 unpack = gen_vec_interleave_lowv8hi;
12712 unpack = gen_vec_interleave_highv4si;
12714 unpack = gen_vec_interleave_lowv4si;
12717 gcc_unreachable ();
12720 dest = gen_lowpart (imode, operands[0]);
12723 se = force_reg (imode, CONST0_RTX (imode));
12725 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12726 operands[1], pc_rtx, pc_rtx);
12728 emit_insn (unpack (dest, operands[1], se));
12731 /* Expand conditional increment or decrement using adb/sbb instructions.
12732 The default case using setcc followed by the conditional move can be
12733 done by generic code. */
12735 ix86_expand_int_addcc (rtx operands[])
12737 enum rtx_code code = GET_CODE (operands[1]);
12739 rtx val = const0_rtx;
12740 bool fpcmp = false;
12741 enum machine_mode mode = GET_MODE (operands[0]);
12743 if (operands[3] != const1_rtx
12744 && operands[3] != constm1_rtx)
12746 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12747 ix86_compare_op1, &compare_op))
12749 code = GET_CODE (compare_op);
12751 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12752 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12755 code = ix86_fp_compare_code_to_integer (code);
12762 PUT_CODE (compare_op,
12763 reverse_condition_maybe_unordered
12764 (GET_CODE (compare_op)));
12766 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12768 PUT_MODE (compare_op, mode);
12770 /* Construct either adc or sbb insn. */
12771 if ((code == LTU) == (operands[3] == constm1_rtx))
12773 switch (GET_MODE (operands[0]))
12776 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12779 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12782 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12785 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12788 gcc_unreachable ();
12793 switch (GET_MODE (operands[0]))
12796 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12799 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12802 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12805 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12808 gcc_unreachable ();
12811 return 1; /* DONE */
12815 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12816 works for floating pointer parameters and nonoffsetable memories.
12817 For pushes, it returns just stack offsets; the values will be saved
12818 in the right order. Maximally three parts are generated. */
12821 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12826 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12828 size = (GET_MODE_SIZE (mode) + 4) / 8;
12830 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12831 gcc_assert (size >= 2 && size <= 3);
12833 /* Optimize constant pool reference to immediates. This is used by fp
12834 moves, that force all constants to memory to allow combining. */
12835 if (MEM_P (operand) && MEM_READONLY_P (operand))
12837 rtx tmp = maybe_get_pool_constant (operand);
12842 if (MEM_P (operand) && !offsettable_memref_p (operand))
12844 /* The only non-offsetable memories we handle are pushes. */
12845 int ok = push_operand (operand, VOIDmode);
12849 operand = copy_rtx (operand);
12850 PUT_MODE (operand, Pmode);
12851 parts[0] = parts[1] = parts[2] = operand;
12855 if (GET_CODE (operand) == CONST_VECTOR)
12857 enum machine_mode imode = int_mode_for_mode (mode);
12858 /* Caution: if we looked through a constant pool memory above,
12859 the operand may actually have a different mode now. That's
12860 ok, since we want to pun this all the way back to an integer. */
12861 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12862 gcc_assert (operand != NULL);
12868 if (mode == DImode)
12869 split_di (&operand, 1, &parts[0], &parts[1]);
12872 if (REG_P (operand))
12874 gcc_assert (reload_completed);
12875 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12876 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12878 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12880 else if (offsettable_memref_p (operand))
12882 operand = adjust_address (operand, SImode, 0);
12883 parts[0] = operand;
12884 parts[1] = adjust_address (operand, SImode, 4);
12886 parts[2] = adjust_address (operand, SImode, 8);
12888 else if (GET_CODE (operand) == CONST_DOUBLE)
12893 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12897 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12898 parts[2] = gen_int_mode (l[2], SImode);
12901 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12904 gcc_unreachable ();
12906 parts[1] = gen_int_mode (l[1], SImode);
12907 parts[0] = gen_int_mode (l[0], SImode);
12910 gcc_unreachable ();
12915 if (mode == TImode)
12916 split_ti (&operand, 1, &parts[0], &parts[1]);
12917 if (mode == XFmode || mode == TFmode)
12919 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12920 if (REG_P (operand))
12922 gcc_assert (reload_completed);
12923 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12924 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12926 else if (offsettable_memref_p (operand))
12928 operand = adjust_address (operand, DImode, 0);
12929 parts[0] = operand;
12930 parts[1] = adjust_address (operand, upper_mode, 8);
12932 else if (GET_CODE (operand) == CONST_DOUBLE)
12937 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12938 real_to_target (l, &r, mode);
12940 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12941 if (HOST_BITS_PER_WIDE_INT >= 64)
12944 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12945 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12948 parts[0] = immed_double_const (l[0], l[1], DImode);
12950 if (upper_mode == SImode)
12951 parts[1] = gen_int_mode (l[2], SImode);
12952 else if (HOST_BITS_PER_WIDE_INT >= 64)
12955 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12956 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12959 parts[1] = immed_double_const (l[2], l[3], DImode);
12962 gcc_unreachable ();
12969 /* Emit insns to perform a move or push of DI, DF, and XF values.
12970 Return false when normal moves are needed; true when all required
12971 insns have been emitted. Operands 2-4 contain the input values
12972 int the correct order; operands 5-7 contain the output values. */
12975 ix86_split_long_move (rtx operands[])
12980 int collisions = 0;
12981 enum machine_mode mode = GET_MODE (operands[0]);
12983 /* The DFmode expanders may ask us to move double.
12984 For 64bit target this is single move. By hiding the fact
12985 here we simplify i386.md splitters. */
12986 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12988 /* Optimize constant pool reference to immediates. This is used by
12989 fp moves, that force all constants to memory to allow combining. */
12991 if (MEM_P (operands[1])
12992 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12993 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12994 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12995 if (push_operand (operands[0], VOIDmode))
12997 operands[0] = copy_rtx (operands[0]);
12998 PUT_MODE (operands[0], Pmode);
13001 operands[0] = gen_lowpart (DImode, operands[0]);
13002 operands[1] = gen_lowpart (DImode, operands[1]);
13003 emit_move_insn (operands[0], operands[1]);
13007 /* The only non-offsettable memory we handle is push. */
13008 if (push_operand (operands[0], VOIDmode))
13011 gcc_assert (!MEM_P (operands[0])
13012 || offsettable_memref_p (operands[0]));
13014 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13015 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13017 /* When emitting push, take care for source operands on the stack. */
13018 if (push && MEM_P (operands[1])
13019 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13022 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13023 XEXP (part[1][2], 0));
13024 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13025 XEXP (part[1][1], 0));
13028 /* We need to do copy in the right order in case an address register
13029 of the source overlaps the destination. */
13030 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13032 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13034 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13037 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13040 /* Collision in the middle part can be handled by reordering. */
13041 if (collisions == 1 && nparts == 3
13042 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13045 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13046 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13049 /* If there are more collisions, we can't handle it by reordering.
13050 Do an lea to the last part and use only one colliding move. */
13051 else if (collisions > 1)
13057 base = part[0][nparts - 1];
13059 /* Handle the case when the last part isn't valid for lea.
13060 Happens in 64-bit mode storing the 12-byte XFmode. */
13061 if (GET_MODE (base) != Pmode)
13062 base = gen_rtx_REG (Pmode, REGNO (base));
13064 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13065 part[1][0] = replace_equiv_address (part[1][0], base);
13066 part[1][1] = replace_equiv_address (part[1][1],
13067 plus_constant (base, UNITS_PER_WORD));
13069 part[1][2] = replace_equiv_address (part[1][2],
13070 plus_constant (base, 8));
13080 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13081 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13082 emit_move_insn (part[0][2], part[1][2]);
13087 /* In 64bit mode we don't have 32bit push available. In case this is
13088 register, it is OK - we will just use larger counterpart. We also
13089 retype memory - these comes from attempt to avoid REX prefix on
13090 moving of second half of TFmode value. */
13091 if (GET_MODE (part[1][1]) == SImode)
13093 switch (GET_CODE (part[1][1]))
13096 part[1][1] = adjust_address (part[1][1], DImode, 0);
13100 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13104 gcc_unreachable ();
13107 if (GET_MODE (part[1][0]) == SImode)
13108 part[1][0] = part[1][1];
13111 emit_move_insn (part[0][1], part[1][1]);
13112 emit_move_insn (part[0][0], part[1][0]);
13116 /* Choose correct order to not overwrite the source before it is copied. */
13117 if ((REG_P (part[0][0])
13118 && REG_P (part[1][1])
13119 && (REGNO (part[0][0]) == REGNO (part[1][1])
13121 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13123 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13127 operands[2] = part[0][2];
13128 operands[3] = part[0][1];
13129 operands[4] = part[0][0];
13130 operands[5] = part[1][2];
13131 operands[6] = part[1][1];
13132 operands[7] = part[1][0];
13136 operands[2] = part[0][1];
13137 operands[3] = part[0][0];
13138 operands[5] = part[1][1];
13139 operands[6] = part[1][0];
13146 operands[2] = part[0][0];
13147 operands[3] = part[0][1];
13148 operands[4] = part[0][2];
13149 operands[5] = part[1][0];
13150 operands[6] = part[1][1];
13151 operands[7] = part[1][2];
13155 operands[2] = part[0][0];
13156 operands[3] = part[0][1];
13157 operands[5] = part[1][0];
13158 operands[6] = part[1][1];
13162 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13165 if (CONST_INT_P (operands[5])
13166 && operands[5] != const0_rtx
13167 && REG_P (operands[2]))
13169 if (CONST_INT_P (operands[6])
13170 && INTVAL (operands[6]) == INTVAL (operands[5]))
13171 operands[6] = operands[2];
13174 && CONST_INT_P (operands[7])
13175 && INTVAL (operands[7]) == INTVAL (operands[5]))
13176 operands[7] = operands[2];
13180 && CONST_INT_P (operands[6])
13181 && operands[6] != const0_rtx
13182 && REG_P (operands[3])
13183 && CONST_INT_P (operands[7])
13184 && INTVAL (operands[7]) == INTVAL (operands[6]))
13185 operands[7] = operands[3];
13188 emit_move_insn (operands[2], operands[5]);
13189 emit_move_insn (operands[3], operands[6]);
13191 emit_move_insn (operands[4], operands[7]);
13196 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13197 left shift by a constant, either using a single shift or
13198 a sequence of add instructions. */
13201 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13205 emit_insn ((mode == DImode
13207 : gen_adddi3) (operand, operand, operand));
13209 else if (!optimize_size
13210 && count * ix86_cost->add <= ix86_cost->shift_const)
13213 for (i=0; i<count; i++)
13215 emit_insn ((mode == DImode
13217 : gen_adddi3) (operand, operand, operand));
13221 emit_insn ((mode == DImode
13223 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13227 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13229 rtx low[2], high[2];
13231 const int single_width = mode == DImode ? 32 : 64;
13233 if (CONST_INT_P (operands[2]))
13235 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13236 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13238 if (count >= single_width)
13240 emit_move_insn (high[0], low[1]);
13241 emit_move_insn (low[0], const0_rtx);
13243 if (count > single_width)
13244 ix86_expand_ashl_const (high[0], count - single_width, mode);
13248 if (!rtx_equal_p (operands[0], operands[1]))
13249 emit_move_insn (operands[0], operands[1]);
13250 emit_insn ((mode == DImode
13252 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13253 ix86_expand_ashl_const (low[0], count, mode);
13258 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13260 if (operands[1] == const1_rtx)
13262 /* Assuming we've chosen a QImode capable registers, then 1 << N
13263 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13264 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13266 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13268 ix86_expand_clear (low[0]);
13269 ix86_expand_clear (high[0]);
13270 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13272 d = gen_lowpart (QImode, low[0]);
13273 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13274 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13275 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13277 d = gen_lowpart (QImode, high[0]);
13278 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13279 s = gen_rtx_NE (QImode, flags, const0_rtx);
13280 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13283 /* Otherwise, we can get the same results by manually performing
13284 a bit extract operation on bit 5/6, and then performing the two
13285 shifts. The two methods of getting 0/1 into low/high are exactly
13286 the same size. Avoiding the shift in the bit extract case helps
13287 pentium4 a bit; no one else seems to care much either way. */
13292 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13293 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13295 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13296 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13298 emit_insn ((mode == DImode
13300 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13301 emit_insn ((mode == DImode
13303 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13304 emit_move_insn (low[0], high[0]);
13305 emit_insn ((mode == DImode
13307 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13310 emit_insn ((mode == DImode
13312 : gen_ashldi3) (low[0], low[0], operands[2]));
13313 emit_insn ((mode == DImode
13315 : gen_ashldi3) (high[0], high[0], operands[2]));
13319 if (operands[1] == constm1_rtx)
13321 /* For -1 << N, we can avoid the shld instruction, because we
13322 know that we're shifting 0...31/63 ones into a -1. */
13323 emit_move_insn (low[0], constm1_rtx);
13325 emit_move_insn (high[0], low[0]);
13327 emit_move_insn (high[0], constm1_rtx);
13331 if (!rtx_equal_p (operands[0], operands[1]))
13332 emit_move_insn (operands[0], operands[1]);
13334 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13335 emit_insn ((mode == DImode
13337 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13340 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13342 if (TARGET_CMOVE && scratch)
13344 ix86_expand_clear (scratch);
13345 emit_insn ((mode == DImode
13346 ? gen_x86_shift_adj_1
13347 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13350 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13354 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13356 rtx low[2], high[2];
13358 const int single_width = mode == DImode ? 32 : 64;
13360 if (CONST_INT_P (operands[2]))
13362 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13363 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13365 if (count == single_width * 2 - 1)
13367 emit_move_insn (high[0], high[1]);
13368 emit_insn ((mode == DImode
13370 : gen_ashrdi3) (high[0], high[0],
13371 GEN_INT (single_width - 1)));
13372 emit_move_insn (low[0], high[0]);
13375 else if (count >= single_width)
13377 emit_move_insn (low[0], high[1]);
13378 emit_move_insn (high[0], low[0]);
13379 emit_insn ((mode == DImode
13381 : gen_ashrdi3) (high[0], high[0],
13382 GEN_INT (single_width - 1)));
13383 if (count > single_width)
13384 emit_insn ((mode == DImode
13386 : gen_ashrdi3) (low[0], low[0],
13387 GEN_INT (count - single_width)));
13391 if (!rtx_equal_p (operands[0], operands[1]))
13392 emit_move_insn (operands[0], operands[1]);
13393 emit_insn ((mode == DImode
13395 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13396 emit_insn ((mode == DImode
13398 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13403 if (!rtx_equal_p (operands[0], operands[1]))
13404 emit_move_insn (operands[0], operands[1]);
13406 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13408 emit_insn ((mode == DImode
13410 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13411 emit_insn ((mode == DImode
13413 : gen_ashrdi3) (high[0], high[0], operands[2]));
13415 if (TARGET_CMOVE && scratch)
13417 emit_move_insn (scratch, high[0]);
13418 emit_insn ((mode == DImode
13420 : gen_ashrdi3) (scratch, scratch,
13421 GEN_INT (single_width - 1)));
13422 emit_insn ((mode == DImode
13423 ? gen_x86_shift_adj_1
13424 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13428 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13433 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13435 rtx low[2], high[2];
13437 const int single_width = mode == DImode ? 32 : 64;
13439 if (CONST_INT_P (operands[2]))
13441 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13442 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13444 if (count >= single_width)
13446 emit_move_insn (low[0], high[1]);
13447 ix86_expand_clear (high[0]);
13449 if (count > single_width)
13450 emit_insn ((mode == DImode
13452 : gen_lshrdi3) (low[0], low[0],
13453 GEN_INT (count - single_width)));
13457 if (!rtx_equal_p (operands[0], operands[1]))
13458 emit_move_insn (operands[0], operands[1]);
13459 emit_insn ((mode == DImode
13461 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13462 emit_insn ((mode == DImode
13464 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13469 if (!rtx_equal_p (operands[0], operands[1]))
13470 emit_move_insn (operands[0], operands[1]);
13472 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13474 emit_insn ((mode == DImode
13476 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13477 emit_insn ((mode == DImode
13479 : gen_lshrdi3) (high[0], high[0], operands[2]));
13481 /* Heh. By reversing the arguments, we can reuse this pattern. */
13482 if (TARGET_CMOVE && scratch)
13484 ix86_expand_clear (scratch);
13485 emit_insn ((mode == DImode
13486 ? gen_x86_shift_adj_1
13487 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13491 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13495 /* Predict just emitted jump instruction to be taken with probability PROB. */
13497 predict_jump (int prob)
13499 rtx insn = get_last_insn ();
13500 gcc_assert (JUMP_P (insn));
13502 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13507 /* Helper function for the string operations below. Dest VARIABLE whether
13508 it is aligned to VALUE bytes. If true, jump to the label. */
13510 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13512 rtx label = gen_label_rtx ();
13513 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13514 if (GET_MODE (variable) == DImode)
13515 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13517 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13518 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13521 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13523 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13527 /* Adjust COUNTER by the VALUE. */
13529 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13531 if (GET_MODE (countreg) == DImode)
13532 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13534 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13537 /* Zero extend possibly SImode EXP to Pmode register. */
13539 ix86_zero_extend_to_Pmode (rtx exp)
13542 if (GET_MODE (exp) == VOIDmode)
13543 return force_reg (Pmode, exp);
13544 if (GET_MODE (exp) == Pmode)
13545 return copy_to_mode_reg (Pmode, exp);
13546 r = gen_reg_rtx (Pmode);
13547 emit_insn (gen_zero_extendsidi2 (r, exp));
13551 /* Divide COUNTREG by SCALE. */
13553 scale_counter (rtx countreg, int scale)
13556 rtx piece_size_mask;
13560 if (CONST_INT_P (countreg))
13561 return GEN_INT (INTVAL (countreg) / scale);
13562 gcc_assert (REG_P (countreg));
13564 piece_size_mask = GEN_INT (scale - 1);
13565 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13566 GEN_INT (exact_log2 (scale)),
13567 NULL, 1, OPTAB_DIRECT);
13571 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13572 DImode for constant loop counts. */
13574 static enum machine_mode
13575 counter_mode (rtx count_exp)
13577 if (GET_MODE (count_exp) != VOIDmode)
13578 return GET_MODE (count_exp);
13579 if (GET_CODE (count_exp) != CONST_INT)
13581 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13586 /* When SRCPTR is non-NULL, output simple loop to move memory
13587 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13588 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13589 equivalent loop to set memory by VALUE (supposed to be in MODE).
13591 The size is rounded down to whole number of chunk size moved at once.
13592 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13596 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13597 rtx destptr, rtx srcptr, rtx value,
13598 rtx count, enum machine_mode mode, int unroll,
13601 rtx out_label, top_label, iter, tmp;
13602 enum machine_mode iter_mode = counter_mode (count);
13603 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13604 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13610 top_label = gen_label_rtx ();
13611 out_label = gen_label_rtx ();
13612 iter = gen_reg_rtx (iter_mode);
13614 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13615 NULL, 1, OPTAB_DIRECT);
13616 /* Those two should combine. */
13617 if (piece_size == const1_rtx)
13619 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13621 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13623 emit_move_insn (iter, const0_rtx);
13625 emit_label (top_label);
13627 tmp = convert_modes (Pmode, iter_mode, iter, true);
13628 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13629 destmem = change_address (destmem, mode, x_addr);
13633 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13634 srcmem = change_address (srcmem, mode, y_addr);
13636 /* When unrolling for chips that reorder memory reads and writes,
13637 we can save registers by using single temporary.
13638 Also using 4 temporaries is overkill in 32bit mode. */
13639 if (!TARGET_64BIT && 0)
13641 for (i = 0; i < unroll; i++)
13646 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13648 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13650 emit_move_insn (destmem, srcmem);
13656 gcc_assert (unroll <= 4);
13657 for (i = 0; i < unroll; i++)
13659 tmpreg[i] = gen_reg_rtx (mode);
13663 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13665 emit_move_insn (tmpreg[i], srcmem);
13667 for (i = 0; i < unroll; i++)
13672 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13674 emit_move_insn (destmem, tmpreg[i]);
13679 for (i = 0; i < unroll; i++)
13683 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13684 emit_move_insn (destmem, value);
13687 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13688 true, OPTAB_LIB_WIDEN);
13690 emit_move_insn (iter, tmp);
13692 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13694 if (expected_size != -1)
13696 expected_size /= GET_MODE_SIZE (mode) * unroll;
13697 if (expected_size == 0)
13699 else if (expected_size > REG_BR_PROB_BASE)
13700 predict_jump (REG_BR_PROB_BASE - 1);
13702 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13705 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13706 iter = ix86_zero_extend_to_Pmode (iter);
13707 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13708 true, OPTAB_LIB_WIDEN);
13709 if (tmp != destptr)
13710 emit_move_insn (destptr, tmp);
13713 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13714 true, OPTAB_LIB_WIDEN);
13716 emit_move_insn (srcptr, tmp);
13718 emit_label (out_label);
13721 /* Output "rep; mov" instruction.
13722 Arguments have same meaning as for previous function */
13724 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13725 rtx destptr, rtx srcptr,
13727 enum machine_mode mode)
13733 /* If the size is known, it is shorter to use rep movs. */
13734 if (mode == QImode && CONST_INT_P (count)
13735 && !(INTVAL (count) & 3))
13738 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13739 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13740 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13741 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13742 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13743 if (mode != QImode)
13745 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13746 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13747 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13748 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13749 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13750 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13754 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13755 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13757 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13761 /* Output "rep; stos" instruction.
13762 Arguments have same meaning as for previous function */
13764 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13766 enum machine_mode mode)
13771 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13772 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13773 value = force_reg (mode, gen_lowpart (mode, value));
13774 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13775 if (mode != QImode)
13777 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13778 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13779 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13782 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13783 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13787 emit_strmov (rtx destmem, rtx srcmem,
13788 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13790 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13791 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13792 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13795 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13797 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13798 rtx destptr, rtx srcptr, rtx count, int max_size)
13801 if (CONST_INT_P (count))
13803 HOST_WIDE_INT countval = INTVAL (count);
13806 if ((countval & 0x10) && max_size > 16)
13810 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13811 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13814 gcc_unreachable ();
13817 if ((countval & 0x08) && max_size > 8)
13820 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13823 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13824 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13828 if ((countval & 0x04) && max_size > 4)
13830 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13833 if ((countval & 0x02) && max_size > 2)
13835 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13838 if ((countval & 0x01) && max_size > 1)
13840 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13847 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13848 count, 1, OPTAB_DIRECT);
13849 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13850 count, QImode, 1, 4);
13854 /* When there are stringops, we can cheaply increase dest and src pointers.
13855 Otherwise we save code size by maintaining offset (zero is readily
13856 available from preceding rep operation) and using x86 addressing modes.
13858 if (TARGET_SINGLE_STRINGOP)
13862 rtx label = ix86_expand_aligntest (count, 4, true);
13863 src = change_address (srcmem, SImode, srcptr);
13864 dest = change_address (destmem, SImode, destptr);
13865 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13866 emit_label (label);
13867 LABEL_NUSES (label) = 1;
13871 rtx label = ix86_expand_aligntest (count, 2, true);
13872 src = change_address (srcmem, HImode, srcptr);
13873 dest = change_address (destmem, HImode, destptr);
13874 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13875 emit_label (label);
13876 LABEL_NUSES (label) = 1;
13880 rtx label = ix86_expand_aligntest (count, 1, true);
13881 src = change_address (srcmem, QImode, srcptr);
13882 dest = change_address (destmem, QImode, destptr);
13883 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13884 emit_label (label);
13885 LABEL_NUSES (label) = 1;
13890 rtx offset = force_reg (Pmode, const0_rtx);
13895 rtx label = ix86_expand_aligntest (count, 4, true);
13896 src = change_address (srcmem, SImode, srcptr);
13897 dest = change_address (destmem, SImode, destptr);
13898 emit_move_insn (dest, src);
13899 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13900 true, OPTAB_LIB_WIDEN);
13902 emit_move_insn (offset, tmp);
13903 emit_label (label);
13904 LABEL_NUSES (label) = 1;
13908 rtx label = ix86_expand_aligntest (count, 2, true);
13909 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13910 src = change_address (srcmem, HImode, tmp);
13911 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13912 dest = change_address (destmem, HImode, tmp);
13913 emit_move_insn (dest, src);
13914 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13915 true, OPTAB_LIB_WIDEN);
13917 emit_move_insn (offset, tmp);
13918 emit_label (label);
13919 LABEL_NUSES (label) = 1;
13923 rtx label = ix86_expand_aligntest (count, 1, true);
13924 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13925 src = change_address (srcmem, QImode, tmp);
13926 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13927 dest = change_address (destmem, QImode, tmp);
13928 emit_move_insn (dest, src);
13929 emit_label (label);
13930 LABEL_NUSES (label) = 1;
13935 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13937 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13938 rtx count, int max_size)
13941 expand_simple_binop (counter_mode (count), AND, count,
13942 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13943 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13944 gen_lowpart (QImode, value), count, QImode,
13948 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13950 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13954 if (CONST_INT_P (count))
13956 HOST_WIDE_INT countval = INTVAL (count);
13959 if ((countval & 0x10) && max_size > 16)
13963 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13964 emit_insn (gen_strset (destptr, dest, value));
13965 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13966 emit_insn (gen_strset (destptr, dest, value));
13969 gcc_unreachable ();
13972 if ((countval & 0x08) && max_size > 8)
13976 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13977 emit_insn (gen_strset (destptr, dest, value));
13981 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13982 emit_insn (gen_strset (destptr, dest, value));
13983 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13984 emit_insn (gen_strset (destptr, dest, value));
13988 if ((countval & 0x04) && max_size > 4)
13990 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13991 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13994 if ((countval & 0x02) && max_size > 2)
13996 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13997 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14000 if ((countval & 0x01) && max_size > 1)
14002 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14003 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14010 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14015 rtx label = ix86_expand_aligntest (count, 16, true);
14018 dest = change_address (destmem, DImode, destptr);
14019 emit_insn (gen_strset (destptr, dest, value));
14020 emit_insn (gen_strset (destptr, dest, value));
14024 dest = change_address (destmem, SImode, destptr);
14025 emit_insn (gen_strset (destptr, dest, value));
14026 emit_insn (gen_strset (destptr, dest, value));
14027 emit_insn (gen_strset (destptr, dest, value));
14028 emit_insn (gen_strset (destptr, dest, value));
14030 emit_label (label);
14031 LABEL_NUSES (label) = 1;
14035 rtx label = ix86_expand_aligntest (count, 8, true);
14038 dest = change_address (destmem, DImode, destptr);
14039 emit_insn (gen_strset (destptr, dest, value));
14043 dest = change_address (destmem, SImode, destptr);
14044 emit_insn (gen_strset (destptr, dest, value));
14045 emit_insn (gen_strset (destptr, dest, value));
14047 emit_label (label);
14048 LABEL_NUSES (label) = 1;
14052 rtx label = ix86_expand_aligntest (count, 4, true);
14053 dest = change_address (destmem, SImode, destptr);
14054 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14055 emit_label (label);
14056 LABEL_NUSES (label) = 1;
14060 rtx label = ix86_expand_aligntest (count, 2, true);
14061 dest = change_address (destmem, HImode, destptr);
14062 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14063 emit_label (label);
14064 LABEL_NUSES (label) = 1;
14068 rtx label = ix86_expand_aligntest (count, 1, true);
14069 dest = change_address (destmem, QImode, destptr);
14070 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14071 emit_label (label);
14072 LABEL_NUSES (label) = 1;
14076 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14077 DESIRED_ALIGNMENT. */
14079 expand_movmem_prologue (rtx destmem, rtx srcmem,
14080 rtx destptr, rtx srcptr, rtx count,
14081 int align, int desired_alignment)
14083 if (align <= 1 && desired_alignment > 1)
14085 rtx label = ix86_expand_aligntest (destptr, 1, false);
14086 srcmem = change_address (srcmem, QImode, srcptr);
14087 destmem = change_address (destmem, QImode, destptr);
14088 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14089 ix86_adjust_counter (count, 1);
14090 emit_label (label);
14091 LABEL_NUSES (label) = 1;
14093 if (align <= 2 && desired_alignment > 2)
14095 rtx label = ix86_expand_aligntest (destptr, 2, false);
14096 srcmem = change_address (srcmem, HImode, srcptr);
14097 destmem = change_address (destmem, HImode, destptr);
14098 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14099 ix86_adjust_counter (count, 2);
14100 emit_label (label);
14101 LABEL_NUSES (label) = 1;
14103 if (align <= 4 && desired_alignment > 4)
14105 rtx label = ix86_expand_aligntest (destptr, 4, false);
14106 srcmem = change_address (srcmem, SImode, srcptr);
14107 destmem = change_address (destmem, SImode, destptr);
14108 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14109 ix86_adjust_counter (count, 4);
14110 emit_label (label);
14111 LABEL_NUSES (label) = 1;
14113 gcc_assert (desired_alignment <= 8);
14116 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14117 DESIRED_ALIGNMENT. */
14119 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14120 int align, int desired_alignment)
14122 if (align <= 1 && desired_alignment > 1)
14124 rtx label = ix86_expand_aligntest (destptr, 1, false);
14125 destmem = change_address (destmem, QImode, destptr);
14126 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14127 ix86_adjust_counter (count, 1);
14128 emit_label (label);
14129 LABEL_NUSES (label) = 1;
14131 if (align <= 2 && desired_alignment > 2)
14133 rtx label = ix86_expand_aligntest (destptr, 2, false);
14134 destmem = change_address (destmem, HImode, destptr);
14135 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14136 ix86_adjust_counter (count, 2);
14137 emit_label (label);
14138 LABEL_NUSES (label) = 1;
14140 if (align <= 4 && desired_alignment > 4)
14142 rtx label = ix86_expand_aligntest (destptr, 4, false);
14143 destmem = change_address (destmem, SImode, destptr);
14144 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14145 ix86_adjust_counter (count, 4);
14146 emit_label (label);
14147 LABEL_NUSES (label) = 1;
14149 gcc_assert (desired_alignment <= 8);
14152 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14153 static enum stringop_alg
14154 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14155 int *dynamic_check)
14157 const struct stringop_algs * algs;
14159 *dynamic_check = -1;
14161 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14163 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14164 if (stringop_alg != no_stringop)
14165 return stringop_alg;
14166 /* rep; movq or rep; movl is the smallest variant. */
14167 else if (optimize_size)
14169 if (!count || (count & 3))
14170 return rep_prefix_1_byte;
14172 return rep_prefix_4_byte;
14174 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14176 else if (expected_size != -1 && expected_size < 4)
14177 return loop_1_byte;
14178 else if (expected_size != -1)
14181 enum stringop_alg alg = libcall;
14182 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14184 gcc_assert (algs->size[i].max);
14185 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14187 if (algs->size[i].alg != libcall)
14188 alg = algs->size[i].alg;
14189 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14190 last non-libcall inline algorithm. */
14191 if (TARGET_INLINE_ALL_STRINGOPS)
14193 /* When the current size is best to be copied by a libcall,
14194 but we are still forced to inline, run the heuristic bellow
14195 that will pick code for medium sized blocks. */
14196 if (alg != libcall)
14201 return algs->size[i].alg;
14204 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14206 /* When asked to inline the call anyway, try to pick meaningful choice.
14207 We look for maximal size of block that is faster to copy by hand and
14208 take blocks of at most of that size guessing that average size will
14209 be roughly half of the block.
14211 If this turns out to be bad, we might simply specify the preferred
14212 choice in ix86_costs. */
14213 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14214 && algs->unknown_size == libcall)
14217 enum stringop_alg alg;
14220 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14221 if (algs->size[i].alg != libcall && algs->size[i].alg)
14222 max = algs->size[i].max;
14225 alg = decide_alg (count, max / 2, memset, dynamic_check);
14226 gcc_assert (*dynamic_check == -1);
14227 gcc_assert (alg != libcall);
14228 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14229 *dynamic_check = max;
14232 return algs->unknown_size;
14235 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14236 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14238 decide_alignment (int align,
14239 enum stringop_alg alg,
14242 int desired_align = 0;
14246 gcc_unreachable ();
14248 case unrolled_loop:
14249 desired_align = GET_MODE_SIZE (Pmode);
14251 case rep_prefix_8_byte:
14254 case rep_prefix_4_byte:
14255 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14256 copying whole cacheline at once. */
14257 if (TARGET_PENTIUMPRO)
14262 case rep_prefix_1_byte:
14263 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14264 copying whole cacheline at once. */
14265 if (TARGET_PENTIUMPRO)
14279 if (desired_align < align)
14280 desired_align = align;
14281 if (expected_size != -1 && expected_size < 4)
14282 desired_align = align;
14283 return desired_align;
14286 /* Return the smallest power of 2 greater than VAL. */
14288 smallest_pow2_greater_than (int val)
14296 /* Expand string move (memcpy) operation. Use i386 string operations when
14297 profitable. expand_clrmem contains similar code. The code depends upon
14298 architecture, block size and alignment, but always has the same
14301 1) Prologue guard: Conditional that jumps up to epilogues for small
14302 blocks that can be handled by epilogue alone. This is faster but
14303 also needed for correctness, since prologue assume the block is larger
14304 than the desired alignment.
14306 Optional dynamic check for size and libcall for large
14307 blocks is emitted here too, with -minline-stringops-dynamically.
14309 2) Prologue: copy first few bytes in order to get destination aligned
14310 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14311 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14312 We emit either a jump tree on power of two sized blocks, or a byte loop.
14314 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14315 with specified algorithm.
14317 4) Epilogue: code copying tail of the block that is too small to be
14318 handled by main body (or up to size guarded by prologue guard). */
14321 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14322 rtx expected_align_exp, rtx expected_size_exp)
14328 rtx jump_around_label = NULL;
14329 HOST_WIDE_INT align = 1;
14330 unsigned HOST_WIDE_INT count = 0;
14331 HOST_WIDE_INT expected_size = -1;
14332 int size_needed = 0, epilogue_size_needed;
14333 int desired_align = 0;
14334 enum stringop_alg alg;
14337 if (CONST_INT_P (align_exp))
14338 align = INTVAL (align_exp);
14339 /* i386 can do misaligned access on reasonably increased cost. */
14340 if (CONST_INT_P (expected_align_exp)
14341 && INTVAL (expected_align_exp) > align)
14342 align = INTVAL (expected_align_exp);
14343 if (CONST_INT_P (count_exp))
14344 count = expected_size = INTVAL (count_exp);
14345 if (CONST_INT_P (expected_size_exp) && count == 0)
14346 expected_size = INTVAL (expected_size_exp);
14348 /* Step 0: Decide on preferred algorithm, desired alignment and
14349 size of chunks to be copied by main loop. */
14351 alg = decide_alg (count, expected_size, false, &dynamic_check);
14352 desired_align = decide_alignment (align, alg, expected_size);
14354 if (!TARGET_ALIGN_STRINGOPS)
14355 align = desired_align;
14357 if (alg == libcall)
14359 gcc_assert (alg != no_stringop);
14361 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14362 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14363 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14368 gcc_unreachable ();
14370 size_needed = GET_MODE_SIZE (Pmode);
14372 case unrolled_loop:
14373 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14375 case rep_prefix_8_byte:
14378 case rep_prefix_4_byte:
14381 case rep_prefix_1_byte:
14387 epilogue_size_needed = size_needed;
14389 /* Step 1: Prologue guard. */
14391 /* Alignment code needs count to be in register. */
14392 if (CONST_INT_P (count_exp) && desired_align > align)
14394 enum machine_mode mode = SImode;
14395 if (TARGET_64BIT && (count & ~0xffffffff))
14397 count_exp = force_reg (mode, count_exp);
14399 gcc_assert (desired_align >= 1 && align >= 1);
14401 /* Ensure that alignment prologue won't copy past end of block. */
14402 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14404 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14405 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14406 Make sure it is power of 2. */
14407 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14409 label = gen_label_rtx ();
14410 emit_cmp_and_jump_insns (count_exp,
14411 GEN_INT (epilogue_size_needed),
14412 LTU, 0, counter_mode (count_exp), 1, label);
14413 if (GET_CODE (count_exp) == CONST_INT)
14415 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14416 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14418 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14420 /* Emit code to decide on runtime whether library call or inline should be
14422 if (dynamic_check != -1)
14424 rtx hot_label = gen_label_rtx ();
14425 jump_around_label = gen_label_rtx ();
14426 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14427 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14429 emit_block_move_via_libcall (dst, src, count_exp, false);
14430 emit_jump (jump_around_label);
14431 emit_label (hot_label);
14434 /* Step 2: Alignment prologue. */
14436 if (desired_align > align)
14438 /* Except for the first move in epilogue, we no longer know
14439 constant offset in aliasing info. It don't seems to worth
14440 the pain to maintain it for the first move, so throw away
14442 src = change_address (src, BLKmode, srcreg);
14443 dst = change_address (dst, BLKmode, destreg);
14444 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14447 if (label && size_needed == 1)
14449 emit_label (label);
14450 LABEL_NUSES (label) = 1;
14454 /* Step 3: Main loop. */
14460 gcc_unreachable ();
14462 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14463 count_exp, QImode, 1, expected_size);
14466 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14467 count_exp, Pmode, 1, expected_size);
14469 case unrolled_loop:
14470 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14471 registers for 4 temporaries anyway. */
14472 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14473 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14476 case rep_prefix_8_byte:
14477 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14480 case rep_prefix_4_byte:
14481 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14484 case rep_prefix_1_byte:
14485 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14489 /* Adjust properly the offset of src and dest memory for aliasing. */
14490 if (CONST_INT_P (count_exp))
14492 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14493 (count / size_needed) * size_needed);
14494 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14495 (count / size_needed) * size_needed);
14499 src = change_address (src, BLKmode, srcreg);
14500 dst = change_address (dst, BLKmode, destreg);
14503 /* Step 4: Epilogue to copy the remaining bytes. */
14507 /* When the main loop is done, COUNT_EXP might hold original count,
14508 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14509 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14510 bytes. Compensate if needed. */
14512 if (size_needed < epilogue_size_needed)
14515 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14516 GEN_INT (size_needed - 1), count_exp, 1,
14518 if (tmp != count_exp)
14519 emit_move_insn (count_exp, tmp);
14521 emit_label (label);
14522 LABEL_NUSES (label) = 1;
14525 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14526 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14527 epilogue_size_needed);
14528 if (jump_around_label)
14529 emit_label (jump_around_label);
14533 /* Helper function for memcpy. For QImode value 0xXY produce
14534 0xXYXYXYXY of wide specified by MODE. This is essentially
14535 a * 0x10101010, but we can do slightly better than
14536 synth_mult by unwinding the sequence by hand on CPUs with
14539 promote_duplicated_reg (enum machine_mode mode, rtx val)
14541 enum machine_mode valmode = GET_MODE (val);
14543 int nops = mode == DImode ? 3 : 2;
14545 gcc_assert (mode == SImode || mode == DImode);
14546 if (val == const0_rtx)
14547 return copy_to_mode_reg (mode, const0_rtx);
14548 if (CONST_INT_P (val))
14550 HOST_WIDE_INT v = INTVAL (val) & 255;
14554 if (mode == DImode)
14555 v |= (v << 16) << 16;
14556 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14559 if (valmode == VOIDmode)
14561 if (valmode != QImode)
14562 val = gen_lowpart (QImode, val);
14563 if (mode == QImode)
14565 if (!TARGET_PARTIAL_REG_STALL)
14567 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14568 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14569 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14570 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14572 rtx reg = convert_modes (mode, QImode, val, true);
14573 tmp = promote_duplicated_reg (mode, const1_rtx);
14574 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14579 rtx reg = convert_modes (mode, QImode, val, true);
14581 if (!TARGET_PARTIAL_REG_STALL)
14582 if (mode == SImode)
14583 emit_insn (gen_movsi_insv_1 (reg, reg));
14585 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14588 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14589 NULL, 1, OPTAB_DIRECT);
14591 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14593 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14594 NULL, 1, OPTAB_DIRECT);
14595 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14596 if (mode == SImode)
14598 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14599 NULL, 1, OPTAB_DIRECT);
14600 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14605 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14606 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14607 alignment from ALIGN to DESIRED_ALIGN. */
14609 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14614 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14615 promoted_val = promote_duplicated_reg (DImode, val);
14616 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14617 promoted_val = promote_duplicated_reg (SImode, val);
14618 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14619 promoted_val = promote_duplicated_reg (HImode, val);
14621 promoted_val = val;
14623 return promoted_val;
14626 /* Expand string clear operation (bzero). Use i386 string operations when
14627 profitable. See expand_movmem comment for explanation of individual
14628 steps performed. */
14630 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14631 rtx expected_align_exp, rtx expected_size_exp)
14636 rtx jump_around_label = NULL;
14637 HOST_WIDE_INT align = 1;
14638 unsigned HOST_WIDE_INT count = 0;
14639 HOST_WIDE_INT expected_size = -1;
14640 int size_needed = 0, epilogue_size_needed;
14641 int desired_align = 0;
14642 enum stringop_alg alg;
14643 rtx promoted_val = NULL;
14644 bool force_loopy_epilogue = false;
14647 if (CONST_INT_P (align_exp))
14648 align = INTVAL (align_exp);
14649 /* i386 can do misaligned access on reasonably increased cost. */
14650 if (CONST_INT_P (expected_align_exp)
14651 && INTVAL (expected_align_exp) > align)
14652 align = INTVAL (expected_align_exp);
14653 if (CONST_INT_P (count_exp))
14654 count = expected_size = INTVAL (count_exp);
14655 if (CONST_INT_P (expected_size_exp) && count == 0)
14656 expected_size = INTVAL (expected_size_exp);
14658 /* Step 0: Decide on preferred algorithm, desired alignment and
14659 size of chunks to be copied by main loop. */
14661 alg = decide_alg (count, expected_size, true, &dynamic_check);
14662 desired_align = decide_alignment (align, alg, expected_size);
14664 if (!TARGET_ALIGN_STRINGOPS)
14665 align = desired_align;
14667 if (alg == libcall)
14669 gcc_assert (alg != no_stringop);
14671 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14672 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14677 gcc_unreachable ();
14679 size_needed = GET_MODE_SIZE (Pmode);
14681 case unrolled_loop:
14682 size_needed = GET_MODE_SIZE (Pmode) * 4;
14684 case rep_prefix_8_byte:
14687 case rep_prefix_4_byte:
14690 case rep_prefix_1_byte:
14695 epilogue_size_needed = size_needed;
14697 /* Step 1: Prologue guard. */
14699 /* Alignment code needs count to be in register. */
14700 if (CONST_INT_P (count_exp) && desired_align > align)
14702 enum machine_mode mode = SImode;
14703 if (TARGET_64BIT && (count & ~0xffffffff))
14705 count_exp = force_reg (mode, count_exp);
14707 /* Do the cheap promotion to allow better CSE across the
14708 main loop and epilogue (ie one load of the big constant in the
14709 front of all code. */
14710 if (CONST_INT_P (val_exp))
14711 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14712 desired_align, align);
14713 /* Ensure that alignment prologue won't copy past end of block. */
14714 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14716 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14717 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14718 Make sure it is power of 2. */
14719 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14721 /* To improve performance of small blocks, we jump around the VAL
14722 promoting mode. This mean that if the promoted VAL is not constant,
14723 we might not use it in the epilogue and have to use byte
14725 if (epilogue_size_needed > 2 && !promoted_val)
14726 force_loopy_epilogue = true;
14727 label = gen_label_rtx ();
14728 emit_cmp_and_jump_insns (count_exp,
14729 GEN_INT (epilogue_size_needed),
14730 LTU, 0, counter_mode (count_exp), 1, label);
14731 if (GET_CODE (count_exp) == CONST_INT)
14733 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14734 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14736 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14738 if (dynamic_check != -1)
14740 rtx hot_label = gen_label_rtx ();
14741 jump_around_label = gen_label_rtx ();
14742 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14743 LEU, 0, counter_mode (count_exp), 1, hot_label);
14744 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14745 set_storage_via_libcall (dst, count_exp, val_exp, false);
14746 emit_jump (jump_around_label);
14747 emit_label (hot_label);
14750 /* Step 2: Alignment prologue. */
14752 /* Do the expensive promotion once we branched off the small blocks. */
14754 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14755 desired_align, align);
14756 gcc_assert (desired_align >= 1 && align >= 1);
14758 if (desired_align > align)
14760 /* Except for the first move in epilogue, we no longer know
14761 constant offset in aliasing info. It don't seems to worth
14762 the pain to maintain it for the first move, so throw away
14764 dst = change_address (dst, BLKmode, destreg);
14765 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14768 if (label && size_needed == 1)
14770 emit_label (label);
14771 LABEL_NUSES (label) = 1;
14775 /* Step 3: Main loop. */
14781 gcc_unreachable ();
14783 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14784 count_exp, QImode, 1, expected_size);
14787 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14788 count_exp, Pmode, 1, expected_size);
14790 case unrolled_loop:
14791 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14792 count_exp, Pmode, 4, expected_size);
14794 case rep_prefix_8_byte:
14795 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14798 case rep_prefix_4_byte:
14799 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14802 case rep_prefix_1_byte:
14803 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14807 /* Adjust properly the offset of src and dest memory for aliasing. */
14808 if (CONST_INT_P (count_exp))
14809 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14810 (count / size_needed) * size_needed);
14812 dst = change_address (dst, BLKmode, destreg);
14814 /* Step 4: Epilogue to copy the remaining bytes. */
14818 /* When the main loop is done, COUNT_EXP might hold original count,
14819 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14820 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14821 bytes. Compensate if needed. */
14823 if (size_needed < desired_align - align)
14826 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14827 GEN_INT (size_needed - 1), count_exp, 1,
14829 size_needed = desired_align - align + 1;
14830 if (tmp != count_exp)
14831 emit_move_insn (count_exp, tmp);
14833 emit_label (label);
14834 LABEL_NUSES (label) = 1;
14836 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14838 if (force_loopy_epilogue)
14839 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14842 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14845 if (jump_around_label)
14846 emit_label (jump_around_label);
14850 /* Expand the appropriate insns for doing strlen if not just doing
14853 out = result, initialized with the start address
14854 align_rtx = alignment of the address.
14855 scratch = scratch register, initialized with the startaddress when
14856 not aligned, otherwise undefined
14858 This is just the body. It needs the initializations mentioned above and
14859 some address computing at the end. These things are done in i386.md. */
14862 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14866 rtx align_2_label = NULL_RTX;
14867 rtx align_3_label = NULL_RTX;
14868 rtx align_4_label = gen_label_rtx ();
14869 rtx end_0_label = gen_label_rtx ();
14871 rtx tmpreg = gen_reg_rtx (SImode);
14872 rtx scratch = gen_reg_rtx (SImode);
14876 if (CONST_INT_P (align_rtx))
14877 align = INTVAL (align_rtx);
14879 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14881 /* Is there a known alignment and is it less than 4? */
14884 rtx scratch1 = gen_reg_rtx (Pmode);
14885 emit_move_insn (scratch1, out);
14886 /* Is there a known alignment and is it not 2? */
14889 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14890 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14892 /* Leave just the 3 lower bits. */
14893 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14894 NULL_RTX, 0, OPTAB_WIDEN);
14896 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14897 Pmode, 1, align_4_label);
14898 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14899 Pmode, 1, align_2_label);
14900 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14901 Pmode, 1, align_3_label);
14905 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14906 check if is aligned to 4 - byte. */
14908 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14909 NULL_RTX, 0, OPTAB_WIDEN);
14911 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14912 Pmode, 1, align_4_label);
14915 mem = change_address (src, QImode, out);
14917 /* Now compare the bytes. */
14919 /* Compare the first n unaligned byte on a byte per byte basis. */
14920 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14921 QImode, 1, end_0_label);
14923 /* Increment the address. */
14925 emit_insn (gen_adddi3 (out, out, const1_rtx));
14927 emit_insn (gen_addsi3 (out, out, const1_rtx));
14929 /* Not needed with an alignment of 2 */
14932 emit_label (align_2_label);
14934 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14938 emit_insn (gen_adddi3 (out, out, const1_rtx));
14940 emit_insn (gen_addsi3 (out, out, const1_rtx));
14942 emit_label (align_3_label);
14945 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14949 emit_insn (gen_adddi3 (out, out, const1_rtx));
14951 emit_insn (gen_addsi3 (out, out, const1_rtx));
14954 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14955 align this loop. It gives only huge programs, but does not help to
14957 emit_label (align_4_label);
14959 mem = change_address (src, SImode, out);
14960 emit_move_insn (scratch, mem);
14962 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14964 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14966 /* This formula yields a nonzero result iff one of the bytes is zero.
14967 This saves three branches inside loop and many cycles. */
14969 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14970 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14971 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14972 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14973 gen_int_mode (0x80808080, SImode)));
14974 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14979 rtx reg = gen_reg_rtx (SImode);
14980 rtx reg2 = gen_reg_rtx (Pmode);
14981 emit_move_insn (reg, tmpreg);
14982 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14984 /* If zero is not in the first two bytes, move two bytes forward. */
14985 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14986 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14987 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14988 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14989 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14992 /* Emit lea manually to avoid clobbering of flags. */
14993 emit_insn (gen_rtx_SET (SImode, reg2,
14994 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14996 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14997 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14998 emit_insn (gen_rtx_SET (VOIDmode, out,
14999 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15006 rtx end_2_label = gen_label_rtx ();
15007 /* Is zero in the first two bytes? */
15009 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15010 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15011 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15012 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15013 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15015 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15016 JUMP_LABEL (tmp) = end_2_label;
15018 /* Not in the first two. Move two bytes forward. */
15019 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15021 emit_insn (gen_adddi3 (out, out, const2_rtx));
15023 emit_insn (gen_addsi3 (out, out, const2_rtx));
15025 emit_label (end_2_label);
15029 /* Avoid branch in fixing the byte. */
15030 tmpreg = gen_lowpart (QImode, tmpreg);
15031 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15032 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15034 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15036 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15038 emit_label (end_0_label);
15041 /* Expand strlen. */
15044 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15046 rtx addr, scratch1, scratch2, scratch3, scratch4;
15048 /* The generic case of strlen expander is long. Avoid it's
15049 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15051 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15052 && !TARGET_INLINE_ALL_STRINGOPS
15054 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15057 addr = force_reg (Pmode, XEXP (src, 0));
15058 scratch1 = gen_reg_rtx (Pmode);
15060 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15063 /* Well it seems that some optimizer does not combine a call like
15064 foo(strlen(bar), strlen(bar));
15065 when the move and the subtraction is done here. It does calculate
15066 the length just once when these instructions are done inside of
15067 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15068 often used and I use one fewer register for the lifetime of
15069 output_strlen_unroll() this is better. */
15071 emit_move_insn (out, addr);
15073 ix86_expand_strlensi_unroll_1 (out, src, align);
15075 /* strlensi_unroll_1 returns the address of the zero at the end of
15076 the string, like memchr(), so compute the length by subtracting
15077 the start address. */
15079 emit_insn (gen_subdi3 (out, out, addr));
15081 emit_insn (gen_subsi3 (out, out, addr));
15086 scratch2 = gen_reg_rtx (Pmode);
15087 scratch3 = gen_reg_rtx (Pmode);
15088 scratch4 = force_reg (Pmode, constm1_rtx);
15090 emit_move_insn (scratch3, addr);
15091 eoschar = force_reg (QImode, eoschar);
15093 src = replace_equiv_address_nv (src, scratch3);
15095 /* If .md starts supporting :P, this can be done in .md. */
15096 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15097 scratch4), UNSPEC_SCAS);
15098 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15101 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15102 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15106 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15107 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15113 /* For given symbol (function) construct code to compute address of it's PLT
15114 entry in large x86-64 PIC model. */
15116 construct_plt_address (rtx symbol)
15118 rtx tmp = gen_reg_rtx (Pmode);
15119 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15121 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15122 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15124 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15125 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15130 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15131 rtx callarg2 ATTRIBUTE_UNUSED,
15132 rtx pop, int sibcall)
15134 rtx use = NULL, call;
15136 if (pop == const0_rtx)
15138 gcc_assert (!TARGET_64BIT || !pop);
15140 if (TARGET_MACHO && !TARGET_64BIT)
15143 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15144 fnaddr = machopic_indirect_call_target (fnaddr);
15149 /* Static functions and indirect calls don't need the pic register. */
15150 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15151 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15152 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15153 use_reg (&use, pic_offset_table_rtx);
15156 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15158 rtx al = gen_rtx_REG (QImode, 0);
15159 emit_move_insn (al, callarg2);
15160 use_reg (&use, al);
15163 if (ix86_cmodel == CM_LARGE_PIC
15164 && GET_CODE (fnaddr) == MEM
15165 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15166 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15167 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15168 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15170 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15171 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15173 if (sibcall && TARGET_64BIT
15174 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15177 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15178 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15179 emit_move_insn (fnaddr, addr);
15180 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15183 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15185 call = gen_rtx_SET (VOIDmode, retval, call);
15188 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15189 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15190 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15193 call = emit_call_insn (call);
15195 CALL_INSN_FUNCTION_USAGE (call) = use;
15199 /* Clear stack slot assignments remembered from previous functions.
15200 This is called from INIT_EXPANDERS once before RTL is emitted for each
15203 static struct machine_function *
15204 ix86_init_machine_status (void)
15206 struct machine_function *f;
15208 f = ggc_alloc_cleared (sizeof (struct machine_function));
15209 f->use_fast_prologue_epilogue_nregs = -1;
15210 f->tls_descriptor_call_expanded_p = 0;
15215 /* Return a MEM corresponding to a stack slot with mode MODE.
15216 Allocate a new slot if necessary.
15218 The RTL for a function can have several slots available: N is
15219 which slot to use. */
15222 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15224 struct stack_local_entry *s;
15226 gcc_assert (n < MAX_386_STACK_LOCALS);
15228 for (s = ix86_stack_locals; s; s = s->next)
15229 if (s->mode == mode && s->n == n)
15230 return copy_rtx (s->rtl);
15232 s = (struct stack_local_entry *)
15233 ggc_alloc (sizeof (struct stack_local_entry));
15236 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15238 s->next = ix86_stack_locals;
15239 ix86_stack_locals = s;
15243 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15245 static GTY(()) rtx ix86_tls_symbol;
15247 ix86_tls_get_addr (void)
15250 if (!ix86_tls_symbol)
15252 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15253 (TARGET_ANY_GNU_TLS
15255 ? "___tls_get_addr"
15256 : "__tls_get_addr");
15259 return ix86_tls_symbol;
15262 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15264 static GTY(()) rtx ix86_tls_module_base_symbol;
15266 ix86_tls_module_base (void)
15269 if (!ix86_tls_module_base_symbol)
15271 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15272 "_TLS_MODULE_BASE_");
15273 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15274 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15277 return ix86_tls_module_base_symbol;
15280 /* Calculate the length of the memory address in the instruction
15281 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15284 memory_address_length (rtx addr)
15286 struct ix86_address parts;
15287 rtx base, index, disp;
15291 if (GET_CODE (addr) == PRE_DEC
15292 || GET_CODE (addr) == POST_INC
15293 || GET_CODE (addr) == PRE_MODIFY
15294 || GET_CODE (addr) == POST_MODIFY)
15297 ok = ix86_decompose_address (addr, &parts);
15300 if (parts.base && GET_CODE (parts.base) == SUBREG)
15301 parts.base = SUBREG_REG (parts.base);
15302 if (parts.index && GET_CODE (parts.index) == SUBREG)
15303 parts.index = SUBREG_REG (parts.index);
15306 index = parts.index;
15311 - esp as the base always wants an index,
15312 - ebp as the base always wants a displacement. */
15314 /* Register Indirect. */
15315 if (base && !index && !disp)
15317 /* esp (for its index) and ebp (for its displacement) need
15318 the two-byte modrm form. */
15319 if (addr == stack_pointer_rtx
15320 || addr == arg_pointer_rtx
15321 || addr == frame_pointer_rtx
15322 || addr == hard_frame_pointer_rtx)
15326 /* Direct Addressing. */
15327 else if (disp && !base && !index)
15332 /* Find the length of the displacement constant. */
15335 if (base && satisfies_constraint_K (disp))
15340 /* ebp always wants a displacement. */
15341 else if (base == hard_frame_pointer_rtx)
15344 /* An index requires the two-byte modrm form.... */
15346 /* ...like esp, which always wants an index. */
15347 || base == stack_pointer_rtx
15348 || base == arg_pointer_rtx
15349 || base == frame_pointer_rtx)
15356 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15357 is set, expect that insn have 8bit immediate alternative. */
15359 ix86_attr_length_immediate_default (rtx insn, int shortform)
15363 extract_insn_cached (insn);
15364 for (i = recog_data.n_operands - 1; i >= 0; --i)
15365 if (CONSTANT_P (recog_data.operand[i]))
15368 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15372 switch (get_attr_mode (insn))
15383 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15388 fatal_insn ("unknown insn mode", insn);
15394 /* Compute default value for "length_address" attribute. */
15396 ix86_attr_length_address_default (rtx insn)
15400 if (get_attr_type (insn) == TYPE_LEA)
15402 rtx set = PATTERN (insn);
15404 if (GET_CODE (set) == PARALLEL)
15405 set = XVECEXP (set, 0, 0);
15407 gcc_assert (GET_CODE (set) == SET);
15409 return memory_address_length (SET_SRC (set));
15412 extract_insn_cached (insn);
15413 for (i = recog_data.n_operands - 1; i >= 0; --i)
15414 if (MEM_P (recog_data.operand[i]))
15416 return memory_address_length (XEXP (recog_data.operand[i], 0));
15422 /* Return the maximum number of instructions a cpu can issue. */
15425 ix86_issue_rate (void)
15429 case PROCESSOR_PENTIUM:
15433 case PROCESSOR_PENTIUMPRO:
15434 case PROCESSOR_PENTIUM4:
15435 case PROCESSOR_ATHLON:
15437 case PROCESSOR_AMDFAM10:
15438 case PROCESSOR_NOCONA:
15439 case PROCESSOR_GENERIC32:
15440 case PROCESSOR_GENERIC64:
15443 case PROCESSOR_CORE2:
15451 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15452 by DEP_INSN and nothing set by DEP_INSN. */
15455 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15459 /* Simplify the test for uninteresting insns. */
15460 if (insn_type != TYPE_SETCC
15461 && insn_type != TYPE_ICMOV
15462 && insn_type != TYPE_FCMOV
15463 && insn_type != TYPE_IBR)
15466 if ((set = single_set (dep_insn)) != 0)
15468 set = SET_DEST (set);
15471 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15472 && XVECLEN (PATTERN (dep_insn), 0) == 2
15473 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15474 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15476 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15477 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15482 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15485 /* This test is true if the dependent insn reads the flags but
15486 not any other potentially set register. */
15487 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15490 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15496 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15497 address with operands set by DEP_INSN. */
15500 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15504 if (insn_type == TYPE_LEA
15507 addr = PATTERN (insn);
15509 if (GET_CODE (addr) == PARALLEL)
15510 addr = XVECEXP (addr, 0, 0);
15512 gcc_assert (GET_CODE (addr) == SET);
15514 addr = SET_SRC (addr);
15519 extract_insn_cached (insn);
15520 for (i = recog_data.n_operands - 1; i >= 0; --i)
15521 if (MEM_P (recog_data.operand[i]))
15523 addr = XEXP (recog_data.operand[i], 0);
15530 return modified_in_p (addr, dep_insn);
15534 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15536 enum attr_type insn_type, dep_insn_type;
15537 enum attr_memory memory;
15539 int dep_insn_code_number;
15541 /* Anti and output dependencies have zero cost on all CPUs. */
15542 if (REG_NOTE_KIND (link) != 0)
15545 dep_insn_code_number = recog_memoized (dep_insn);
15547 /* If we can't recognize the insns, we can't really do anything. */
15548 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15551 insn_type = get_attr_type (insn);
15552 dep_insn_type = get_attr_type (dep_insn);
15556 case PROCESSOR_PENTIUM:
15557 /* Address Generation Interlock adds a cycle of latency. */
15558 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15561 /* ??? Compares pair with jump/setcc. */
15562 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15565 /* Floating point stores require value to be ready one cycle earlier. */
15566 if (insn_type == TYPE_FMOV
15567 && get_attr_memory (insn) == MEMORY_STORE
15568 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15572 case PROCESSOR_PENTIUMPRO:
15573 memory = get_attr_memory (insn);
15575 /* INT->FP conversion is expensive. */
15576 if (get_attr_fp_int_src (dep_insn))
15579 /* There is one cycle extra latency between an FP op and a store. */
15580 if (insn_type == TYPE_FMOV
15581 && (set = single_set (dep_insn)) != NULL_RTX
15582 && (set2 = single_set (insn)) != NULL_RTX
15583 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15584 && MEM_P (SET_DEST (set2)))
15587 /* Show ability of reorder buffer to hide latency of load by executing
15588 in parallel with previous instruction in case
15589 previous instruction is not needed to compute the address. */
15590 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15591 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15593 /* Claim moves to take one cycle, as core can issue one load
15594 at time and the next load can start cycle later. */
15595 if (dep_insn_type == TYPE_IMOV
15596 || dep_insn_type == TYPE_FMOV)
15604 memory = get_attr_memory (insn);
15606 /* The esp dependency is resolved before the instruction is really
15608 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15609 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15612 /* INT->FP conversion is expensive. */
15613 if (get_attr_fp_int_src (dep_insn))
15616 /* Show ability of reorder buffer to hide latency of load by executing
15617 in parallel with previous instruction in case
15618 previous instruction is not needed to compute the address. */
15619 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15620 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15622 /* Claim moves to take one cycle, as core can issue one load
15623 at time and the next load can start cycle later. */
15624 if (dep_insn_type == TYPE_IMOV
15625 || dep_insn_type == TYPE_FMOV)
15634 case PROCESSOR_ATHLON:
15636 case PROCESSOR_AMDFAM10:
15637 case PROCESSOR_GENERIC32:
15638 case PROCESSOR_GENERIC64:
15639 memory = get_attr_memory (insn);
15641 /* Show ability of reorder buffer to hide latency of load by executing
15642 in parallel with previous instruction in case
15643 previous instruction is not needed to compute the address. */
15644 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15645 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15647 enum attr_unit unit = get_attr_unit (insn);
15650 /* Because of the difference between the length of integer and
15651 floating unit pipeline preparation stages, the memory operands
15652 for floating point are cheaper.
15654 ??? For Athlon it the difference is most probably 2. */
15655 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15658 loadcost = TARGET_ATHLON ? 2 : 0;
15660 if (cost >= loadcost)
15673 /* How many alternative schedules to try. This should be as wide as the
15674 scheduling freedom in the DFA, but no wider. Making this value too
15675 large results extra work for the scheduler. */
15678 ia32_multipass_dfa_lookahead (void)
15680 if (ix86_tune == PROCESSOR_PENTIUM)
15683 if (ix86_tune == PROCESSOR_PENTIUMPRO
15684 || ix86_tune == PROCESSOR_K6)
15692 /* Compute the alignment given to a constant that is being placed in memory.
15693 EXP is the constant and ALIGN is the alignment that the object would
15695 The value of this function is used instead of that alignment to align
15699 ix86_constant_alignment (tree exp, int align)
15701 if (TREE_CODE (exp) == REAL_CST)
15703 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15705 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15708 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15709 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15710 return BITS_PER_WORD;
15715 /* Compute the alignment for a static variable.
15716 TYPE is the data type, and ALIGN is the alignment that
15717 the object would ordinarily have. The value of this function is used
15718 instead of that alignment to align the object. */
15721 ix86_data_alignment (tree type, int align)
15723 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15725 if (AGGREGATE_TYPE_P (type)
15726 && TYPE_SIZE (type)
15727 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15728 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15729 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15730 && align < max_align)
15733 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15734 to 16byte boundary. */
15737 if (AGGREGATE_TYPE_P (type)
15738 && TYPE_SIZE (type)
15739 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15740 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15741 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15745 if (TREE_CODE (type) == ARRAY_TYPE)
15747 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15749 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15752 else if (TREE_CODE (type) == COMPLEX_TYPE)
15755 if (TYPE_MODE (type) == DCmode && align < 64)
15757 if (TYPE_MODE (type) == XCmode && align < 128)
15760 else if ((TREE_CODE (type) == RECORD_TYPE
15761 || TREE_CODE (type) == UNION_TYPE
15762 || TREE_CODE (type) == QUAL_UNION_TYPE)
15763 && TYPE_FIELDS (type))
15765 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15767 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15770 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15771 || TREE_CODE (type) == INTEGER_TYPE)
15773 if (TYPE_MODE (type) == DFmode && align < 64)
15775 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15782 /* Compute the alignment for a local variable.
15783 TYPE is the data type, and ALIGN is the alignment that
15784 the object would ordinarily have. The value of this macro is used
15785 instead of that alignment to align the object. */
15788 ix86_local_alignment (tree type, int align)
15790 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15791 to 16byte boundary. */
15794 if (AGGREGATE_TYPE_P (type)
15795 && TYPE_SIZE (type)
15796 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15797 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15798 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15801 if (TREE_CODE (type) == ARRAY_TYPE)
15803 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15805 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15808 else if (TREE_CODE (type) == COMPLEX_TYPE)
15810 if (TYPE_MODE (type) == DCmode && align < 64)
15812 if (TYPE_MODE (type) == XCmode && align < 128)
15815 else if ((TREE_CODE (type) == RECORD_TYPE
15816 || TREE_CODE (type) == UNION_TYPE
15817 || TREE_CODE (type) == QUAL_UNION_TYPE)
15818 && TYPE_FIELDS (type))
15820 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15822 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15825 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15826 || TREE_CODE (type) == INTEGER_TYPE)
15829 if (TYPE_MODE (type) == DFmode && align < 64)
15831 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15837 /* Emit RTL insns to initialize the variable parts of a trampoline.
15838 FNADDR is an RTX for the address of the function's pure code.
15839 CXT is an RTX for the static chain value for the function. */
15841 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15845 /* Compute offset from the end of the jmp to the target function. */
15846 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15847 plus_constant (tramp, 10),
15848 NULL_RTX, 1, OPTAB_DIRECT);
15849 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15850 gen_int_mode (0xb9, QImode));
15851 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15852 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15853 gen_int_mode (0xe9, QImode));
15854 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15859 /* Try to load address using shorter movl instead of movabs.
15860 We may want to support movq for kernel mode, but kernel does not use
15861 trampolines at the moment. */
15862 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15864 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15865 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15866 gen_int_mode (0xbb41, HImode));
15867 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15868 gen_lowpart (SImode, fnaddr));
15873 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15874 gen_int_mode (0xbb49, HImode));
15875 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15879 /* Load static chain using movabs to r10. */
15880 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15881 gen_int_mode (0xba49, HImode));
15882 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15885 /* Jump to the r11 */
15886 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15887 gen_int_mode (0xff49, HImode));
15888 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15889 gen_int_mode (0xe3, QImode));
15891 gcc_assert (offset <= TRAMPOLINE_SIZE);
15894 #ifdef ENABLE_EXECUTE_STACK
15895 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15896 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15900 /* Codes for all the SSE/MMX builtins. */
15903 IX86_BUILTIN_ADDPS,
15904 IX86_BUILTIN_ADDSS,
15905 IX86_BUILTIN_DIVPS,
15906 IX86_BUILTIN_DIVSS,
15907 IX86_BUILTIN_MULPS,
15908 IX86_BUILTIN_MULSS,
15909 IX86_BUILTIN_SUBPS,
15910 IX86_BUILTIN_SUBSS,
15912 IX86_BUILTIN_CMPEQPS,
15913 IX86_BUILTIN_CMPLTPS,
15914 IX86_BUILTIN_CMPLEPS,
15915 IX86_BUILTIN_CMPGTPS,
15916 IX86_BUILTIN_CMPGEPS,
15917 IX86_BUILTIN_CMPNEQPS,
15918 IX86_BUILTIN_CMPNLTPS,
15919 IX86_BUILTIN_CMPNLEPS,
15920 IX86_BUILTIN_CMPNGTPS,
15921 IX86_BUILTIN_CMPNGEPS,
15922 IX86_BUILTIN_CMPORDPS,
15923 IX86_BUILTIN_CMPUNORDPS,
15924 IX86_BUILTIN_CMPEQSS,
15925 IX86_BUILTIN_CMPLTSS,
15926 IX86_BUILTIN_CMPLESS,
15927 IX86_BUILTIN_CMPNEQSS,
15928 IX86_BUILTIN_CMPNLTSS,
15929 IX86_BUILTIN_CMPNLESS,
15930 IX86_BUILTIN_CMPNGTSS,
15931 IX86_BUILTIN_CMPNGESS,
15932 IX86_BUILTIN_CMPORDSS,
15933 IX86_BUILTIN_CMPUNORDSS,
15935 IX86_BUILTIN_COMIEQSS,
15936 IX86_BUILTIN_COMILTSS,
15937 IX86_BUILTIN_COMILESS,
15938 IX86_BUILTIN_COMIGTSS,
15939 IX86_BUILTIN_COMIGESS,
15940 IX86_BUILTIN_COMINEQSS,
15941 IX86_BUILTIN_UCOMIEQSS,
15942 IX86_BUILTIN_UCOMILTSS,
15943 IX86_BUILTIN_UCOMILESS,
15944 IX86_BUILTIN_UCOMIGTSS,
15945 IX86_BUILTIN_UCOMIGESS,
15946 IX86_BUILTIN_UCOMINEQSS,
15948 IX86_BUILTIN_CVTPI2PS,
15949 IX86_BUILTIN_CVTPS2PI,
15950 IX86_BUILTIN_CVTSI2SS,
15951 IX86_BUILTIN_CVTSI642SS,
15952 IX86_BUILTIN_CVTSS2SI,
15953 IX86_BUILTIN_CVTSS2SI64,
15954 IX86_BUILTIN_CVTTPS2PI,
15955 IX86_BUILTIN_CVTTSS2SI,
15956 IX86_BUILTIN_CVTTSS2SI64,
15958 IX86_BUILTIN_MAXPS,
15959 IX86_BUILTIN_MAXSS,
15960 IX86_BUILTIN_MINPS,
15961 IX86_BUILTIN_MINSS,
15963 IX86_BUILTIN_LOADUPS,
15964 IX86_BUILTIN_STOREUPS,
15965 IX86_BUILTIN_MOVSS,
15967 IX86_BUILTIN_MOVHLPS,
15968 IX86_BUILTIN_MOVLHPS,
15969 IX86_BUILTIN_LOADHPS,
15970 IX86_BUILTIN_LOADLPS,
15971 IX86_BUILTIN_STOREHPS,
15972 IX86_BUILTIN_STORELPS,
15974 IX86_BUILTIN_MASKMOVQ,
15975 IX86_BUILTIN_MOVMSKPS,
15976 IX86_BUILTIN_PMOVMSKB,
15978 IX86_BUILTIN_MOVNTPS,
15979 IX86_BUILTIN_MOVNTQ,
15981 IX86_BUILTIN_LOADDQU,
15982 IX86_BUILTIN_STOREDQU,
15984 IX86_BUILTIN_PACKSSWB,
15985 IX86_BUILTIN_PACKSSDW,
15986 IX86_BUILTIN_PACKUSWB,
15988 IX86_BUILTIN_PADDB,
15989 IX86_BUILTIN_PADDW,
15990 IX86_BUILTIN_PADDD,
15991 IX86_BUILTIN_PADDQ,
15992 IX86_BUILTIN_PADDSB,
15993 IX86_BUILTIN_PADDSW,
15994 IX86_BUILTIN_PADDUSB,
15995 IX86_BUILTIN_PADDUSW,
15996 IX86_BUILTIN_PSUBB,
15997 IX86_BUILTIN_PSUBW,
15998 IX86_BUILTIN_PSUBD,
15999 IX86_BUILTIN_PSUBQ,
16000 IX86_BUILTIN_PSUBSB,
16001 IX86_BUILTIN_PSUBSW,
16002 IX86_BUILTIN_PSUBUSB,
16003 IX86_BUILTIN_PSUBUSW,
16006 IX86_BUILTIN_PANDN,
16010 IX86_BUILTIN_PAVGB,
16011 IX86_BUILTIN_PAVGW,
16013 IX86_BUILTIN_PCMPEQB,
16014 IX86_BUILTIN_PCMPEQW,
16015 IX86_BUILTIN_PCMPEQD,
16016 IX86_BUILTIN_PCMPGTB,
16017 IX86_BUILTIN_PCMPGTW,
16018 IX86_BUILTIN_PCMPGTD,
16020 IX86_BUILTIN_PMADDWD,
16022 IX86_BUILTIN_PMAXSW,
16023 IX86_BUILTIN_PMAXUB,
16024 IX86_BUILTIN_PMINSW,
16025 IX86_BUILTIN_PMINUB,
16027 IX86_BUILTIN_PMULHUW,
16028 IX86_BUILTIN_PMULHW,
16029 IX86_BUILTIN_PMULLW,
16031 IX86_BUILTIN_PSADBW,
16032 IX86_BUILTIN_PSHUFW,
16034 IX86_BUILTIN_PSLLW,
16035 IX86_BUILTIN_PSLLD,
16036 IX86_BUILTIN_PSLLQ,
16037 IX86_BUILTIN_PSRAW,
16038 IX86_BUILTIN_PSRAD,
16039 IX86_BUILTIN_PSRLW,
16040 IX86_BUILTIN_PSRLD,
16041 IX86_BUILTIN_PSRLQ,
16042 IX86_BUILTIN_PSLLWI,
16043 IX86_BUILTIN_PSLLDI,
16044 IX86_BUILTIN_PSLLQI,
16045 IX86_BUILTIN_PSRAWI,
16046 IX86_BUILTIN_PSRADI,
16047 IX86_BUILTIN_PSRLWI,
16048 IX86_BUILTIN_PSRLDI,
16049 IX86_BUILTIN_PSRLQI,
16051 IX86_BUILTIN_PUNPCKHBW,
16052 IX86_BUILTIN_PUNPCKHWD,
16053 IX86_BUILTIN_PUNPCKHDQ,
16054 IX86_BUILTIN_PUNPCKLBW,
16055 IX86_BUILTIN_PUNPCKLWD,
16056 IX86_BUILTIN_PUNPCKLDQ,
16058 IX86_BUILTIN_SHUFPS,
16060 IX86_BUILTIN_RCPPS,
16061 IX86_BUILTIN_RCPSS,
16062 IX86_BUILTIN_RSQRTPS,
16063 IX86_BUILTIN_RSQRTSS,
16064 IX86_BUILTIN_SQRTPS,
16065 IX86_BUILTIN_SQRTSS,
16067 IX86_BUILTIN_UNPCKHPS,
16068 IX86_BUILTIN_UNPCKLPS,
16070 IX86_BUILTIN_ANDPS,
16071 IX86_BUILTIN_ANDNPS,
16073 IX86_BUILTIN_XORPS,
16076 IX86_BUILTIN_LDMXCSR,
16077 IX86_BUILTIN_STMXCSR,
16078 IX86_BUILTIN_SFENCE,
16080 /* 3DNow! Original */
16081 IX86_BUILTIN_FEMMS,
16082 IX86_BUILTIN_PAVGUSB,
16083 IX86_BUILTIN_PF2ID,
16084 IX86_BUILTIN_PFACC,
16085 IX86_BUILTIN_PFADD,
16086 IX86_BUILTIN_PFCMPEQ,
16087 IX86_BUILTIN_PFCMPGE,
16088 IX86_BUILTIN_PFCMPGT,
16089 IX86_BUILTIN_PFMAX,
16090 IX86_BUILTIN_PFMIN,
16091 IX86_BUILTIN_PFMUL,
16092 IX86_BUILTIN_PFRCP,
16093 IX86_BUILTIN_PFRCPIT1,
16094 IX86_BUILTIN_PFRCPIT2,
16095 IX86_BUILTIN_PFRSQIT1,
16096 IX86_BUILTIN_PFRSQRT,
16097 IX86_BUILTIN_PFSUB,
16098 IX86_BUILTIN_PFSUBR,
16099 IX86_BUILTIN_PI2FD,
16100 IX86_BUILTIN_PMULHRW,
16102 /* 3DNow! Athlon Extensions */
16103 IX86_BUILTIN_PF2IW,
16104 IX86_BUILTIN_PFNACC,
16105 IX86_BUILTIN_PFPNACC,
16106 IX86_BUILTIN_PI2FW,
16107 IX86_BUILTIN_PSWAPDSI,
16108 IX86_BUILTIN_PSWAPDSF,
16111 IX86_BUILTIN_ADDPD,
16112 IX86_BUILTIN_ADDSD,
16113 IX86_BUILTIN_DIVPD,
16114 IX86_BUILTIN_DIVSD,
16115 IX86_BUILTIN_MULPD,
16116 IX86_BUILTIN_MULSD,
16117 IX86_BUILTIN_SUBPD,
16118 IX86_BUILTIN_SUBSD,
16120 IX86_BUILTIN_CMPEQPD,
16121 IX86_BUILTIN_CMPLTPD,
16122 IX86_BUILTIN_CMPLEPD,
16123 IX86_BUILTIN_CMPGTPD,
16124 IX86_BUILTIN_CMPGEPD,
16125 IX86_BUILTIN_CMPNEQPD,
16126 IX86_BUILTIN_CMPNLTPD,
16127 IX86_BUILTIN_CMPNLEPD,
16128 IX86_BUILTIN_CMPNGTPD,
16129 IX86_BUILTIN_CMPNGEPD,
16130 IX86_BUILTIN_CMPORDPD,
16131 IX86_BUILTIN_CMPUNORDPD,
16132 IX86_BUILTIN_CMPEQSD,
16133 IX86_BUILTIN_CMPLTSD,
16134 IX86_BUILTIN_CMPLESD,
16135 IX86_BUILTIN_CMPNEQSD,
16136 IX86_BUILTIN_CMPNLTSD,
16137 IX86_BUILTIN_CMPNLESD,
16138 IX86_BUILTIN_CMPORDSD,
16139 IX86_BUILTIN_CMPUNORDSD,
16141 IX86_BUILTIN_COMIEQSD,
16142 IX86_BUILTIN_COMILTSD,
16143 IX86_BUILTIN_COMILESD,
16144 IX86_BUILTIN_COMIGTSD,
16145 IX86_BUILTIN_COMIGESD,
16146 IX86_BUILTIN_COMINEQSD,
16147 IX86_BUILTIN_UCOMIEQSD,
16148 IX86_BUILTIN_UCOMILTSD,
16149 IX86_BUILTIN_UCOMILESD,
16150 IX86_BUILTIN_UCOMIGTSD,
16151 IX86_BUILTIN_UCOMIGESD,
16152 IX86_BUILTIN_UCOMINEQSD,
16154 IX86_BUILTIN_MAXPD,
16155 IX86_BUILTIN_MAXSD,
16156 IX86_BUILTIN_MINPD,
16157 IX86_BUILTIN_MINSD,
16159 IX86_BUILTIN_ANDPD,
16160 IX86_BUILTIN_ANDNPD,
16162 IX86_BUILTIN_XORPD,
16164 IX86_BUILTIN_SQRTPD,
16165 IX86_BUILTIN_SQRTSD,
16167 IX86_BUILTIN_UNPCKHPD,
16168 IX86_BUILTIN_UNPCKLPD,
16170 IX86_BUILTIN_SHUFPD,
16172 IX86_BUILTIN_LOADUPD,
16173 IX86_BUILTIN_STOREUPD,
16174 IX86_BUILTIN_MOVSD,
16176 IX86_BUILTIN_LOADHPD,
16177 IX86_BUILTIN_LOADLPD,
16179 IX86_BUILTIN_CVTDQ2PD,
16180 IX86_BUILTIN_CVTDQ2PS,
16182 IX86_BUILTIN_CVTPD2DQ,
16183 IX86_BUILTIN_CVTPD2PI,
16184 IX86_BUILTIN_CVTPD2PS,
16185 IX86_BUILTIN_CVTTPD2DQ,
16186 IX86_BUILTIN_CVTTPD2PI,
16188 IX86_BUILTIN_CVTPI2PD,
16189 IX86_BUILTIN_CVTSI2SD,
16190 IX86_BUILTIN_CVTSI642SD,
16192 IX86_BUILTIN_CVTSD2SI,
16193 IX86_BUILTIN_CVTSD2SI64,
16194 IX86_BUILTIN_CVTSD2SS,
16195 IX86_BUILTIN_CVTSS2SD,
16196 IX86_BUILTIN_CVTTSD2SI,
16197 IX86_BUILTIN_CVTTSD2SI64,
16199 IX86_BUILTIN_CVTPS2DQ,
16200 IX86_BUILTIN_CVTPS2PD,
16201 IX86_BUILTIN_CVTTPS2DQ,
16203 IX86_BUILTIN_MOVNTI,
16204 IX86_BUILTIN_MOVNTPD,
16205 IX86_BUILTIN_MOVNTDQ,
16208 IX86_BUILTIN_MASKMOVDQU,
16209 IX86_BUILTIN_MOVMSKPD,
16210 IX86_BUILTIN_PMOVMSKB128,
16212 IX86_BUILTIN_PACKSSWB128,
16213 IX86_BUILTIN_PACKSSDW128,
16214 IX86_BUILTIN_PACKUSWB128,
16216 IX86_BUILTIN_PADDB128,
16217 IX86_BUILTIN_PADDW128,
16218 IX86_BUILTIN_PADDD128,
16219 IX86_BUILTIN_PADDQ128,
16220 IX86_BUILTIN_PADDSB128,
16221 IX86_BUILTIN_PADDSW128,
16222 IX86_BUILTIN_PADDUSB128,
16223 IX86_BUILTIN_PADDUSW128,
16224 IX86_BUILTIN_PSUBB128,
16225 IX86_BUILTIN_PSUBW128,
16226 IX86_BUILTIN_PSUBD128,
16227 IX86_BUILTIN_PSUBQ128,
16228 IX86_BUILTIN_PSUBSB128,
16229 IX86_BUILTIN_PSUBSW128,
16230 IX86_BUILTIN_PSUBUSB128,
16231 IX86_BUILTIN_PSUBUSW128,
16233 IX86_BUILTIN_PAND128,
16234 IX86_BUILTIN_PANDN128,
16235 IX86_BUILTIN_POR128,
16236 IX86_BUILTIN_PXOR128,
16238 IX86_BUILTIN_PAVGB128,
16239 IX86_BUILTIN_PAVGW128,
16241 IX86_BUILTIN_PCMPEQB128,
16242 IX86_BUILTIN_PCMPEQW128,
16243 IX86_BUILTIN_PCMPEQD128,
16244 IX86_BUILTIN_PCMPGTB128,
16245 IX86_BUILTIN_PCMPGTW128,
16246 IX86_BUILTIN_PCMPGTD128,
16248 IX86_BUILTIN_PMADDWD128,
16250 IX86_BUILTIN_PMAXSW128,
16251 IX86_BUILTIN_PMAXUB128,
16252 IX86_BUILTIN_PMINSW128,
16253 IX86_BUILTIN_PMINUB128,
16255 IX86_BUILTIN_PMULUDQ,
16256 IX86_BUILTIN_PMULUDQ128,
16257 IX86_BUILTIN_PMULHUW128,
16258 IX86_BUILTIN_PMULHW128,
16259 IX86_BUILTIN_PMULLW128,
16261 IX86_BUILTIN_PSADBW128,
16262 IX86_BUILTIN_PSHUFHW,
16263 IX86_BUILTIN_PSHUFLW,
16264 IX86_BUILTIN_PSHUFD,
16266 IX86_BUILTIN_PSLLDQI128,
16267 IX86_BUILTIN_PSLLWI128,
16268 IX86_BUILTIN_PSLLDI128,
16269 IX86_BUILTIN_PSLLQI128,
16270 IX86_BUILTIN_PSRAWI128,
16271 IX86_BUILTIN_PSRADI128,
16272 IX86_BUILTIN_PSRLDQI128,
16273 IX86_BUILTIN_PSRLWI128,
16274 IX86_BUILTIN_PSRLDI128,
16275 IX86_BUILTIN_PSRLQI128,
16277 IX86_BUILTIN_PSLLDQ128,
16278 IX86_BUILTIN_PSLLW128,
16279 IX86_BUILTIN_PSLLD128,
16280 IX86_BUILTIN_PSLLQ128,
16281 IX86_BUILTIN_PSRAW128,
16282 IX86_BUILTIN_PSRAD128,
16283 IX86_BUILTIN_PSRLW128,
16284 IX86_BUILTIN_PSRLD128,
16285 IX86_BUILTIN_PSRLQ128,
16287 IX86_BUILTIN_PUNPCKHBW128,
16288 IX86_BUILTIN_PUNPCKHWD128,
16289 IX86_BUILTIN_PUNPCKHDQ128,
16290 IX86_BUILTIN_PUNPCKHQDQ128,
16291 IX86_BUILTIN_PUNPCKLBW128,
16292 IX86_BUILTIN_PUNPCKLWD128,
16293 IX86_BUILTIN_PUNPCKLDQ128,
16294 IX86_BUILTIN_PUNPCKLQDQ128,
16296 IX86_BUILTIN_CLFLUSH,
16297 IX86_BUILTIN_MFENCE,
16298 IX86_BUILTIN_LFENCE,
16300 /* Prescott New Instructions. */
16301 IX86_BUILTIN_ADDSUBPS,
16302 IX86_BUILTIN_HADDPS,
16303 IX86_BUILTIN_HSUBPS,
16304 IX86_BUILTIN_MOVSHDUP,
16305 IX86_BUILTIN_MOVSLDUP,
16306 IX86_BUILTIN_ADDSUBPD,
16307 IX86_BUILTIN_HADDPD,
16308 IX86_BUILTIN_HSUBPD,
16309 IX86_BUILTIN_LDDQU,
16311 IX86_BUILTIN_MONITOR,
16312 IX86_BUILTIN_MWAIT,
16315 IX86_BUILTIN_PHADDW,
16316 IX86_BUILTIN_PHADDD,
16317 IX86_BUILTIN_PHADDSW,
16318 IX86_BUILTIN_PHSUBW,
16319 IX86_BUILTIN_PHSUBD,
16320 IX86_BUILTIN_PHSUBSW,
16321 IX86_BUILTIN_PMADDUBSW,
16322 IX86_BUILTIN_PMULHRSW,
16323 IX86_BUILTIN_PSHUFB,
16324 IX86_BUILTIN_PSIGNB,
16325 IX86_BUILTIN_PSIGNW,
16326 IX86_BUILTIN_PSIGND,
16327 IX86_BUILTIN_PALIGNR,
16328 IX86_BUILTIN_PABSB,
16329 IX86_BUILTIN_PABSW,
16330 IX86_BUILTIN_PABSD,
16332 IX86_BUILTIN_PHADDW128,
16333 IX86_BUILTIN_PHADDD128,
16334 IX86_BUILTIN_PHADDSW128,
16335 IX86_BUILTIN_PHSUBW128,
16336 IX86_BUILTIN_PHSUBD128,
16337 IX86_BUILTIN_PHSUBSW128,
16338 IX86_BUILTIN_PMADDUBSW128,
16339 IX86_BUILTIN_PMULHRSW128,
16340 IX86_BUILTIN_PSHUFB128,
16341 IX86_BUILTIN_PSIGNB128,
16342 IX86_BUILTIN_PSIGNW128,
16343 IX86_BUILTIN_PSIGND128,
16344 IX86_BUILTIN_PALIGNR128,
16345 IX86_BUILTIN_PABSB128,
16346 IX86_BUILTIN_PABSW128,
16347 IX86_BUILTIN_PABSD128,
16349 /* AMDFAM10 - SSE4A New Instructions. */
16350 IX86_BUILTIN_MOVNTSD,
16351 IX86_BUILTIN_MOVNTSS,
16352 IX86_BUILTIN_EXTRQI,
16353 IX86_BUILTIN_EXTRQ,
16354 IX86_BUILTIN_INSERTQI,
16355 IX86_BUILTIN_INSERTQ,
16357 IX86_BUILTIN_VEC_INIT_V2SI,
16358 IX86_BUILTIN_VEC_INIT_V4HI,
16359 IX86_BUILTIN_VEC_INIT_V8QI,
16360 IX86_BUILTIN_VEC_EXT_V2DF,
16361 IX86_BUILTIN_VEC_EXT_V2DI,
16362 IX86_BUILTIN_VEC_EXT_V4SF,
16363 IX86_BUILTIN_VEC_EXT_V4SI,
16364 IX86_BUILTIN_VEC_EXT_V8HI,
16365 IX86_BUILTIN_VEC_EXT_V2SI,
16366 IX86_BUILTIN_VEC_EXT_V4HI,
16367 IX86_BUILTIN_VEC_SET_V8HI,
16368 IX86_BUILTIN_VEC_SET_V4HI,
16373 /* Table for the ix86 builtin decls. */
16374 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16376 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16377 * if the target_flags include one of MASK. Stores the function decl
16378 * in the ix86_builtins array.
16379 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16382 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16384 tree decl = NULL_TREE;
16386 if (mask & target_flags
16387 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16389 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16391 ix86_builtins[(int) code] = decl;
16397 /* Like def_builtin, but also marks the function decl "const". */
16400 def_builtin_const (int mask, const char *name, tree type,
16401 enum ix86_builtins code)
16403 tree decl = def_builtin (mask, name, type, code);
16405 TREE_READONLY (decl) = 1;
16409 /* Bits for builtin_description.flag. */
16411 /* Set when we don't support the comparison natively, and should
16412 swap_comparison in order to support it. */
16413 #define BUILTIN_DESC_SWAP_OPERANDS 1
16415 struct builtin_description
16417 const unsigned int mask;
16418 const enum insn_code icode;
16419 const char *const name;
16420 const enum ix86_builtins code;
16421 const enum rtx_code comparison;
16422 const unsigned int flag;
16425 static const struct builtin_description bdesc_comi[] =
16427 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16428 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16429 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16430 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16431 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16432 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16433 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16434 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16435 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16436 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16437 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16438 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16439 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16440 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16441 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16442 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16443 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16444 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16445 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16446 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16450 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16453 static const struct builtin_description bdesc_2arg[] =
16456 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16457 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16458 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16459 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16460 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16461 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16462 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16463 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16465 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16466 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16467 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16468 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16469 BUILTIN_DESC_SWAP_OPERANDS },
16470 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16471 BUILTIN_DESC_SWAP_OPERANDS },
16472 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16473 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16474 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16475 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16476 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16477 BUILTIN_DESC_SWAP_OPERANDS },
16478 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16479 BUILTIN_DESC_SWAP_OPERANDS },
16480 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16481 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16482 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16483 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16484 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16485 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16486 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16487 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16488 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16489 BUILTIN_DESC_SWAP_OPERANDS },
16490 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16491 BUILTIN_DESC_SWAP_OPERANDS },
16492 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16494 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16495 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16496 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16497 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16499 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16500 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16501 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16502 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16504 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16505 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16506 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16507 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16508 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16511 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16512 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16513 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16515 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16516 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16517 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16518 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16520 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16521 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16522 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16523 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16524 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16525 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16526 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16527 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16529 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16530 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16531 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16533 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16534 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16535 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16536 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16538 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16539 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16541 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16542 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16543 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16544 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16545 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16546 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16548 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16549 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16550 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16551 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16553 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16554 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16555 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16556 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16557 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16558 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16561 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16562 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16563 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16565 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16566 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16567 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16569 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16570 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16571 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16572 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16573 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16574 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16576 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16577 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16578 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16579 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16580 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16581 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16583 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16584 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16585 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16586 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16588 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16589 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16592 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16594 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16595 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16596 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16601 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16602 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16603 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16604 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16605 BUILTIN_DESC_SWAP_OPERANDS },
16606 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16607 BUILTIN_DESC_SWAP_OPERANDS },
16608 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16612 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16613 BUILTIN_DESC_SWAP_OPERANDS },
16614 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16615 BUILTIN_DESC_SWAP_OPERANDS },
16616 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16617 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16618 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16619 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16620 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16621 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16622 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16623 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16624 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16626 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16628 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16631 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16632 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16633 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16634 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16636 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16637 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16638 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16641 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16642 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16643 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16644 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16645 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16646 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16647 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16648 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16650 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16651 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16652 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16653 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16654 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16655 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16656 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16657 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16659 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16660 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16662 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16663 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16664 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16665 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16667 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16668 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16670 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16671 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16672 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16673 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16674 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16675 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16677 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16678 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16679 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16680 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16682 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16683 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16684 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16685 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16687 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16688 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16689 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16691 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16692 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16693 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16695 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16696 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16698 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16699 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16701 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16702 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16703 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16705 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16706 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16707 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16709 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16710 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16712 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16714 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16715 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16716 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16717 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16720 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16721 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16722 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16723 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16724 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16725 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16728 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16729 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16730 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16731 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16732 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16733 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16734 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16735 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16736 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16737 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16738 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16739 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16740 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16741 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16742 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16743 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16744 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16745 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16746 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16747 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16748 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16749 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16750 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16751 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16754 static const struct builtin_description bdesc_1arg[] =
16756 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16757 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16759 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16760 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16761 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16763 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16764 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16765 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16766 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16767 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16768 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16770 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16771 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16773 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16775 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16776 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16778 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16779 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16780 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16781 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16782 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16784 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16786 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16787 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16788 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16789 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16791 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16792 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16793 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16796 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16797 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16800 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16801 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16802 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16803 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16804 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16805 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16808 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16809 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16812 ix86_init_mmx_sse_builtins (void)
16814 const struct builtin_description * d;
16817 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16818 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16819 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16820 tree V2DI_type_node
16821 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16822 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16823 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16824 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16825 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16826 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16827 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16829 tree pchar_type_node = build_pointer_type (char_type_node);
16830 tree pcchar_type_node = build_pointer_type (
16831 build_type_variant (char_type_node, 1, 0));
16832 tree pfloat_type_node = build_pointer_type (float_type_node);
16833 tree pcfloat_type_node = build_pointer_type (
16834 build_type_variant (float_type_node, 1, 0));
16835 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16836 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16837 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16840 tree int_ftype_v4sf_v4sf
16841 = build_function_type_list (integer_type_node,
16842 V4SF_type_node, V4SF_type_node, NULL_TREE);
16843 tree v4si_ftype_v4sf_v4sf
16844 = build_function_type_list (V4SI_type_node,
16845 V4SF_type_node, V4SF_type_node, NULL_TREE);
16846 /* MMX/SSE/integer conversions. */
16847 tree int_ftype_v4sf
16848 = build_function_type_list (integer_type_node,
16849 V4SF_type_node, NULL_TREE);
16850 tree int64_ftype_v4sf
16851 = build_function_type_list (long_long_integer_type_node,
16852 V4SF_type_node, NULL_TREE);
16853 tree int_ftype_v8qi
16854 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16855 tree v4sf_ftype_v4sf_int
16856 = build_function_type_list (V4SF_type_node,
16857 V4SF_type_node, integer_type_node, NULL_TREE);
16858 tree v4sf_ftype_v4sf_int64
16859 = build_function_type_list (V4SF_type_node,
16860 V4SF_type_node, long_long_integer_type_node,
16862 tree v4sf_ftype_v4sf_v2si
16863 = build_function_type_list (V4SF_type_node,
16864 V4SF_type_node, V2SI_type_node, NULL_TREE);
16866 /* Miscellaneous. */
16867 tree v8qi_ftype_v4hi_v4hi
16868 = build_function_type_list (V8QI_type_node,
16869 V4HI_type_node, V4HI_type_node, NULL_TREE);
16870 tree v4hi_ftype_v2si_v2si
16871 = build_function_type_list (V4HI_type_node,
16872 V2SI_type_node, V2SI_type_node, NULL_TREE);
16873 tree v4sf_ftype_v4sf_v4sf_int
16874 = build_function_type_list (V4SF_type_node,
16875 V4SF_type_node, V4SF_type_node,
16876 integer_type_node, NULL_TREE);
16877 tree v2si_ftype_v4hi_v4hi
16878 = build_function_type_list (V2SI_type_node,
16879 V4HI_type_node, V4HI_type_node, NULL_TREE);
16880 tree v4hi_ftype_v4hi_int
16881 = build_function_type_list (V4HI_type_node,
16882 V4HI_type_node, integer_type_node, NULL_TREE);
16883 tree v4hi_ftype_v4hi_di
16884 = build_function_type_list (V4HI_type_node,
16885 V4HI_type_node, long_long_unsigned_type_node,
16887 tree v2si_ftype_v2si_di
16888 = build_function_type_list (V2SI_type_node,
16889 V2SI_type_node, long_long_unsigned_type_node,
16891 tree void_ftype_void
16892 = build_function_type (void_type_node, void_list_node);
16893 tree void_ftype_unsigned
16894 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16895 tree void_ftype_unsigned_unsigned
16896 = build_function_type_list (void_type_node, unsigned_type_node,
16897 unsigned_type_node, NULL_TREE);
16898 tree void_ftype_pcvoid_unsigned_unsigned
16899 = build_function_type_list (void_type_node, const_ptr_type_node,
16900 unsigned_type_node, unsigned_type_node,
16902 tree unsigned_ftype_void
16903 = build_function_type (unsigned_type_node, void_list_node);
16904 tree v2si_ftype_v4sf
16905 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16906 /* Loads/stores. */
16907 tree void_ftype_v8qi_v8qi_pchar
16908 = build_function_type_list (void_type_node,
16909 V8QI_type_node, V8QI_type_node,
16910 pchar_type_node, NULL_TREE);
16911 tree v4sf_ftype_pcfloat
16912 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16913 /* @@@ the type is bogus */
16914 tree v4sf_ftype_v4sf_pv2si
16915 = build_function_type_list (V4SF_type_node,
16916 V4SF_type_node, pv2si_type_node, NULL_TREE);
16917 tree void_ftype_pv2si_v4sf
16918 = build_function_type_list (void_type_node,
16919 pv2si_type_node, V4SF_type_node, NULL_TREE);
16920 tree void_ftype_pfloat_v4sf
16921 = build_function_type_list (void_type_node,
16922 pfloat_type_node, V4SF_type_node, NULL_TREE);
16923 tree void_ftype_pdi_di
16924 = build_function_type_list (void_type_node,
16925 pdi_type_node, long_long_unsigned_type_node,
16927 tree void_ftype_pv2di_v2di
16928 = build_function_type_list (void_type_node,
16929 pv2di_type_node, V2DI_type_node, NULL_TREE);
16930 /* Normal vector unops. */
16931 tree v4sf_ftype_v4sf
16932 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16933 tree v16qi_ftype_v16qi
16934 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16935 tree v8hi_ftype_v8hi
16936 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16937 tree v4si_ftype_v4si
16938 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16939 tree v8qi_ftype_v8qi
16940 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16941 tree v4hi_ftype_v4hi
16942 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16944 /* Normal vector binops. */
16945 tree v4sf_ftype_v4sf_v4sf
16946 = build_function_type_list (V4SF_type_node,
16947 V4SF_type_node, V4SF_type_node, NULL_TREE);
16948 tree v8qi_ftype_v8qi_v8qi
16949 = build_function_type_list (V8QI_type_node,
16950 V8QI_type_node, V8QI_type_node, NULL_TREE);
16951 tree v4hi_ftype_v4hi_v4hi
16952 = build_function_type_list (V4HI_type_node,
16953 V4HI_type_node, V4HI_type_node, NULL_TREE);
16954 tree v2si_ftype_v2si_v2si
16955 = build_function_type_list (V2SI_type_node,
16956 V2SI_type_node, V2SI_type_node, NULL_TREE);
16957 tree di_ftype_di_di
16958 = build_function_type_list (long_long_unsigned_type_node,
16959 long_long_unsigned_type_node,
16960 long_long_unsigned_type_node, NULL_TREE);
16962 tree di_ftype_di_di_int
16963 = build_function_type_list (long_long_unsigned_type_node,
16964 long_long_unsigned_type_node,
16965 long_long_unsigned_type_node,
16966 integer_type_node, NULL_TREE);
16968 tree v2si_ftype_v2sf
16969 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16970 tree v2sf_ftype_v2si
16971 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16972 tree v2si_ftype_v2si
16973 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16974 tree v2sf_ftype_v2sf
16975 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16976 tree v2sf_ftype_v2sf_v2sf
16977 = build_function_type_list (V2SF_type_node,
16978 V2SF_type_node, V2SF_type_node, NULL_TREE);
16979 tree v2si_ftype_v2sf_v2sf
16980 = build_function_type_list (V2SI_type_node,
16981 V2SF_type_node, V2SF_type_node, NULL_TREE);
16982 tree pint_type_node = build_pointer_type (integer_type_node);
16983 tree pdouble_type_node = build_pointer_type (double_type_node);
16984 tree pcdouble_type_node = build_pointer_type (
16985 build_type_variant (double_type_node, 1, 0));
16986 tree int_ftype_v2df_v2df
16987 = build_function_type_list (integer_type_node,
16988 V2DF_type_node, V2DF_type_node, NULL_TREE);
16990 tree void_ftype_pcvoid
16991 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16992 tree v4sf_ftype_v4si
16993 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16994 tree v4si_ftype_v4sf
16995 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16996 tree v2df_ftype_v4si
16997 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16998 tree v4si_ftype_v2df
16999 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17000 tree v2si_ftype_v2df
17001 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17002 tree v4sf_ftype_v2df
17003 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17004 tree v2df_ftype_v2si
17005 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17006 tree v2df_ftype_v4sf
17007 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17008 tree int_ftype_v2df
17009 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17010 tree int64_ftype_v2df
17011 = build_function_type_list (long_long_integer_type_node,
17012 V2DF_type_node, NULL_TREE);
17013 tree v2df_ftype_v2df_int
17014 = build_function_type_list (V2DF_type_node,
17015 V2DF_type_node, integer_type_node, NULL_TREE);
17016 tree v2df_ftype_v2df_int64
17017 = build_function_type_list (V2DF_type_node,
17018 V2DF_type_node, long_long_integer_type_node,
17020 tree v4sf_ftype_v4sf_v2df
17021 = build_function_type_list (V4SF_type_node,
17022 V4SF_type_node, V2DF_type_node, NULL_TREE);
17023 tree v2df_ftype_v2df_v4sf
17024 = build_function_type_list (V2DF_type_node,
17025 V2DF_type_node, V4SF_type_node, NULL_TREE);
17026 tree v2df_ftype_v2df_v2df_int
17027 = build_function_type_list (V2DF_type_node,
17028 V2DF_type_node, V2DF_type_node,
17031 tree v2df_ftype_v2df_pcdouble
17032 = build_function_type_list (V2DF_type_node,
17033 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17034 tree void_ftype_pdouble_v2df
17035 = build_function_type_list (void_type_node,
17036 pdouble_type_node, V2DF_type_node, NULL_TREE);
17037 tree void_ftype_pint_int
17038 = build_function_type_list (void_type_node,
17039 pint_type_node, integer_type_node, NULL_TREE);
17040 tree void_ftype_v16qi_v16qi_pchar
17041 = build_function_type_list (void_type_node,
17042 V16QI_type_node, V16QI_type_node,
17043 pchar_type_node, NULL_TREE);
17044 tree v2df_ftype_pcdouble
17045 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17046 tree v2df_ftype_v2df_v2df
17047 = build_function_type_list (V2DF_type_node,
17048 V2DF_type_node, V2DF_type_node, NULL_TREE);
17049 tree v16qi_ftype_v16qi_v16qi
17050 = build_function_type_list (V16QI_type_node,
17051 V16QI_type_node, V16QI_type_node, NULL_TREE);
17052 tree v8hi_ftype_v8hi_v8hi
17053 = build_function_type_list (V8HI_type_node,
17054 V8HI_type_node, V8HI_type_node, NULL_TREE);
17055 tree v4si_ftype_v4si_v4si
17056 = build_function_type_list (V4SI_type_node,
17057 V4SI_type_node, V4SI_type_node, NULL_TREE);
17058 tree v2di_ftype_v2di_v2di
17059 = build_function_type_list (V2DI_type_node,
17060 V2DI_type_node, V2DI_type_node, NULL_TREE);
17061 tree v2di_ftype_v2df_v2df
17062 = build_function_type_list (V2DI_type_node,
17063 V2DF_type_node, V2DF_type_node, NULL_TREE);
17064 tree v2df_ftype_v2df
17065 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17066 tree v2di_ftype_v2di_int
17067 = build_function_type_list (V2DI_type_node,
17068 V2DI_type_node, integer_type_node, NULL_TREE);
17069 tree v2di_ftype_v2di_v2di_int
17070 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17071 V2DI_type_node, integer_type_node, NULL_TREE);
17072 tree v4si_ftype_v4si_int
17073 = build_function_type_list (V4SI_type_node,
17074 V4SI_type_node, integer_type_node, NULL_TREE);
17075 tree v8hi_ftype_v8hi_int
17076 = build_function_type_list (V8HI_type_node,
17077 V8HI_type_node, integer_type_node, NULL_TREE);
17078 tree v4si_ftype_v8hi_v8hi
17079 = build_function_type_list (V4SI_type_node,
17080 V8HI_type_node, V8HI_type_node, NULL_TREE);
17081 tree di_ftype_v8qi_v8qi
17082 = build_function_type_list (long_long_unsigned_type_node,
17083 V8QI_type_node, V8QI_type_node, NULL_TREE);
17084 tree di_ftype_v2si_v2si
17085 = build_function_type_list (long_long_unsigned_type_node,
17086 V2SI_type_node, V2SI_type_node, NULL_TREE);
17087 tree v2di_ftype_v16qi_v16qi
17088 = build_function_type_list (V2DI_type_node,
17089 V16QI_type_node, V16QI_type_node, NULL_TREE);
17090 tree v2di_ftype_v4si_v4si
17091 = build_function_type_list (V2DI_type_node,
17092 V4SI_type_node, V4SI_type_node, NULL_TREE);
17093 tree int_ftype_v16qi
17094 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17095 tree v16qi_ftype_pcchar
17096 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17097 tree void_ftype_pchar_v16qi
17098 = build_function_type_list (void_type_node,
17099 pchar_type_node, V16QI_type_node, NULL_TREE);
17101 tree v2di_ftype_v2di_unsigned_unsigned
17102 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17103 unsigned_type_node, unsigned_type_node,
17105 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17106 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17107 unsigned_type_node, unsigned_type_node,
17109 tree v2di_ftype_v2di_v16qi
17110 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17114 tree float128_type;
17117 /* The __float80 type. */
17118 if (TYPE_MODE (long_double_type_node) == XFmode)
17119 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17123 /* The __float80 type. */
17124 float80_type = make_node (REAL_TYPE);
17125 TYPE_PRECISION (float80_type) = 80;
17126 layout_type (float80_type);
17127 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17132 float128_type = make_node (REAL_TYPE);
17133 TYPE_PRECISION (float128_type) = 128;
17134 layout_type (float128_type);
17135 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17138 /* Add all builtins that are more or less simple operations on two
17140 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17142 /* Use one of the operands; the target can have a different mode for
17143 mask-generating compares. */
17144 enum machine_mode mode;
17149 mode = insn_data[d->icode].operand[1].mode;
17154 type = v16qi_ftype_v16qi_v16qi;
17157 type = v8hi_ftype_v8hi_v8hi;
17160 type = v4si_ftype_v4si_v4si;
17163 type = v2di_ftype_v2di_v2di;
17166 type = v2df_ftype_v2df_v2df;
17169 type = v4sf_ftype_v4sf_v4sf;
17172 type = v8qi_ftype_v8qi_v8qi;
17175 type = v4hi_ftype_v4hi_v4hi;
17178 type = v2si_ftype_v2si_v2si;
17181 type = di_ftype_di_di;
17185 gcc_unreachable ();
17188 /* Override for comparisons. */
17189 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17190 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17191 type = v4si_ftype_v4sf_v4sf;
17193 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17194 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17195 type = v2di_ftype_v2df_v2df;
17197 def_builtin (d->mask, d->name, type, d->code);
17200 /* Add all builtins that are more or less simple operations on 1 operand. */
17201 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17203 enum machine_mode mode;
17208 mode = insn_data[d->icode].operand[1].mode;
17213 type = v16qi_ftype_v16qi;
17216 type = v8hi_ftype_v8hi;
17219 type = v4si_ftype_v4si;
17222 type = v2df_ftype_v2df;
17225 type = v4sf_ftype_v4sf;
17228 type = v8qi_ftype_v8qi;
17231 type = v4hi_ftype_v4hi;
17234 type = v2si_ftype_v2si;
17241 def_builtin (d->mask, d->name, type, d->code);
17244 /* Add the remaining MMX insns with somewhat more complicated types. */
17245 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17246 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17247 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17248 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17250 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17251 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17252 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17254 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17255 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17257 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17258 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17260 /* comi/ucomi insns. */
17261 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17262 if (d->mask == MASK_SSE2)
17263 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17265 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17267 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17268 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17269 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17271 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17272 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17273 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17274 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17275 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17276 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17277 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17278 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17279 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17280 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17281 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17283 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17285 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17286 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17288 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17289 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17290 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17291 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17293 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17294 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17295 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17296 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17298 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17300 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17302 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17303 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17304 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17305 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17306 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17307 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17309 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17311 /* Original 3DNow! */
17312 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17313 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17314 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17315 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17316 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17317 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17318 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17319 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17320 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17321 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17322 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17323 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17328 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17329 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17330 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17331 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17333 /* 3DNow! extension as used in the Athlon CPU. */
17334 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17335 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17336 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17337 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17338 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17339 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17342 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17344 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17347 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17350 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17352 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17353 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17356 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17357 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17358 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17359 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17361 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17362 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17364 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17366 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17367 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17369 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17370 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17371 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17372 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17373 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17375 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17377 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17378 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17379 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17380 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17382 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17383 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17384 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17386 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17387 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17388 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17389 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17391 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17392 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17393 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17395 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17396 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17398 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17399 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17401 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17402 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17403 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17404 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17405 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17406 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17407 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17409 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17410 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17411 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17412 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17413 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17414 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17415 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17417 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17418 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17419 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17420 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17422 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17424 /* Prescott New Instructions. */
17425 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17426 void_ftype_pcvoid_unsigned_unsigned,
17427 IX86_BUILTIN_MONITOR);
17428 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17429 void_ftype_unsigned_unsigned,
17430 IX86_BUILTIN_MWAIT);
17431 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17432 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17435 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17436 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17437 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17438 IX86_BUILTIN_PALIGNR);
17440 /* AMDFAM10 SSE4A New built-ins */
17441 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17442 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17443 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17444 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17445 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17446 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17447 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17448 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17449 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17450 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17451 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17452 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17454 /* Access to the vec_init patterns. */
17455 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17456 integer_type_node, NULL_TREE);
17457 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17458 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17460 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17461 short_integer_type_node,
17462 short_integer_type_node,
17463 short_integer_type_node, NULL_TREE);
17464 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17465 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17467 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17468 char_type_node, char_type_node,
17469 char_type_node, char_type_node,
17470 char_type_node, char_type_node,
17471 char_type_node, NULL_TREE);
17472 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17473 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17475 /* Access to the vec_extract patterns. */
17476 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17477 integer_type_node, NULL_TREE);
17478 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17479 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17481 ftype = build_function_type_list (long_long_integer_type_node,
17482 V2DI_type_node, integer_type_node,
17484 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17485 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17487 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17488 integer_type_node, NULL_TREE);
17489 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17490 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17492 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17493 integer_type_node, NULL_TREE);
17494 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17495 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17497 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17498 integer_type_node, NULL_TREE);
17499 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17500 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17502 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17503 integer_type_node, NULL_TREE);
17504 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17505 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17507 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17508 integer_type_node, NULL_TREE);
17509 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17510 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17512 /* Access to the vec_set patterns. */
17513 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17515 integer_type_node, NULL_TREE);
17516 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17517 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17519 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17521 integer_type_node, NULL_TREE);
17522 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17523 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17527 ix86_init_builtins (void)
17530 ix86_init_mmx_sse_builtins ();
17533 /* Errors in the source file can cause expand_expr to return const0_rtx
17534 where we expect a vector. To avoid crashing, use one of the vector
17535 clear instructions. */
17537 safe_vector_operand (rtx x, enum machine_mode mode)
17539 if (x == const0_rtx)
17540 x = CONST0_RTX (mode);
17544 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17547 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17550 tree arg0 = CALL_EXPR_ARG (exp, 0);
17551 tree arg1 = CALL_EXPR_ARG (exp, 1);
17552 rtx op0 = expand_normal (arg0);
17553 rtx op1 = expand_normal (arg1);
17554 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17555 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17558 if (VECTOR_MODE_P (mode0))
17559 op0 = safe_vector_operand (op0, mode0);
17560 if (VECTOR_MODE_P (mode1))
17561 op1 = safe_vector_operand (op1, mode1);
17563 if (optimize || !target
17564 || GET_MODE (target) != tmode
17565 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17566 target = gen_reg_rtx (tmode);
17568 if (GET_MODE (op1) == SImode && mode1 == TImode)
17570 rtx x = gen_reg_rtx (V4SImode);
17571 emit_insn (gen_sse2_loadd (x, op1));
17572 op1 = gen_lowpart (TImode, x);
17575 /* The insn must want input operands in the same modes as the
17577 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17578 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17580 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17581 op0 = copy_to_mode_reg (mode0, op0);
17582 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17583 op1 = copy_to_mode_reg (mode1, op1);
17585 /* ??? Using ix86_fixup_binary_operands is problematic when
17586 we've got mismatched modes. Fake it. */
17592 if (tmode == mode0 && tmode == mode1)
17594 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17598 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17600 op0 = force_reg (mode0, op0);
17601 op1 = force_reg (mode1, op1);
17602 target = gen_reg_rtx (tmode);
17605 pat = GEN_FCN (icode) (target, op0, op1);
17612 /* Subroutine of ix86_expand_builtin to take care of stores. */
17615 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17618 tree arg0 = CALL_EXPR_ARG (exp, 0);
17619 tree arg1 = CALL_EXPR_ARG (exp, 1);
17620 rtx op0 = expand_normal (arg0);
17621 rtx op1 = expand_normal (arg1);
17622 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17623 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17625 if (VECTOR_MODE_P (mode1))
17626 op1 = safe_vector_operand (op1, mode1);
17628 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17629 op1 = copy_to_mode_reg (mode1, op1);
17631 pat = GEN_FCN (icode) (op0, op1);
17637 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17640 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17641 rtx target, int do_load)
17644 tree arg0 = CALL_EXPR_ARG (exp, 0);
17645 rtx op0 = expand_normal (arg0);
17646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17649 if (optimize || !target
17650 || GET_MODE (target) != tmode
17651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17652 target = gen_reg_rtx (tmode);
17654 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17657 if (VECTOR_MODE_P (mode0))
17658 op0 = safe_vector_operand (op0, mode0);
17660 if ((optimize && !register_operand (op0, mode0))
17661 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17662 op0 = copy_to_mode_reg (mode0, op0);
17665 pat = GEN_FCN (icode) (target, op0);
17672 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17673 sqrtss, rsqrtss, rcpss. */
17676 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17679 tree arg0 = CALL_EXPR_ARG (exp, 0);
17680 rtx op1, op0 = expand_normal (arg0);
17681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17684 if (optimize || !target
17685 || GET_MODE (target) != tmode
17686 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17687 target = gen_reg_rtx (tmode);
17689 if (VECTOR_MODE_P (mode0))
17690 op0 = safe_vector_operand (op0, mode0);
17692 if ((optimize && !register_operand (op0, mode0))
17693 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17694 op0 = copy_to_mode_reg (mode0, op0);
17697 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17698 op1 = copy_to_mode_reg (mode0, op1);
17700 pat = GEN_FCN (icode) (target, op0, op1);
17707 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17710 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17714 tree arg0 = CALL_EXPR_ARG (exp, 0);
17715 tree arg1 = CALL_EXPR_ARG (exp, 1);
17716 rtx op0 = expand_normal (arg0);
17717 rtx op1 = expand_normal (arg1);
17719 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17720 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17721 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17722 enum rtx_code comparison = d->comparison;
17724 if (VECTOR_MODE_P (mode0))
17725 op0 = safe_vector_operand (op0, mode0);
17726 if (VECTOR_MODE_P (mode1))
17727 op1 = safe_vector_operand (op1, mode1);
17729 /* Swap operands if we have a comparison that isn't available in
17731 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17733 rtx tmp = gen_reg_rtx (mode1);
17734 emit_move_insn (tmp, op1);
17739 if (optimize || !target
17740 || GET_MODE (target) != tmode
17741 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17742 target = gen_reg_rtx (tmode);
17744 if ((optimize && !register_operand (op0, mode0))
17745 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17746 op0 = copy_to_mode_reg (mode0, op0);
17747 if ((optimize && !register_operand (op1, mode1))
17748 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17749 op1 = copy_to_mode_reg (mode1, op1);
17751 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17752 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17759 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17762 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17766 tree arg0 = CALL_EXPR_ARG (exp, 0);
17767 tree arg1 = CALL_EXPR_ARG (exp, 1);
17768 rtx op0 = expand_normal (arg0);
17769 rtx op1 = expand_normal (arg1);
17771 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17772 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17773 enum rtx_code comparison = d->comparison;
17775 if (VECTOR_MODE_P (mode0))
17776 op0 = safe_vector_operand (op0, mode0);
17777 if (VECTOR_MODE_P (mode1))
17778 op1 = safe_vector_operand (op1, mode1);
17780 /* Swap operands if we have a comparison that isn't available in
17782 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17789 target = gen_reg_rtx (SImode);
17790 emit_move_insn (target, const0_rtx);
17791 target = gen_rtx_SUBREG (QImode, target, 0);
17793 if ((optimize && !register_operand (op0, mode0))
17794 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17795 op0 = copy_to_mode_reg (mode0, op0);
17796 if ((optimize && !register_operand (op1, mode1))
17797 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17798 op1 = copy_to_mode_reg (mode1, op1);
17800 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17801 pat = GEN_FCN (d->icode) (op0, op1);
17805 emit_insn (gen_rtx_SET (VOIDmode,
17806 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17807 gen_rtx_fmt_ee (comparison, QImode,
17811 return SUBREG_REG (target);
17814 /* Return the integer constant in ARG. Constrain it to be in the range
17815 of the subparts of VEC_TYPE; issue an error if not. */
17818 get_element_number (tree vec_type, tree arg)
17820 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17822 if (!host_integerp (arg, 1)
17823 || (elt = tree_low_cst (arg, 1), elt > max))
17825 error ("selector must be an integer constant in the range 0..%wi", max);
17832 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17833 ix86_expand_vector_init. We DO have language-level syntax for this, in
17834 the form of (type){ init-list }. Except that since we can't place emms
17835 instructions from inside the compiler, we can't allow the use of MMX
17836 registers unless the user explicitly asks for it. So we do *not* define
17837 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17838 we have builtins invoked by mmintrin.h that gives us license to emit
17839 these sorts of instructions. */
17842 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17844 enum machine_mode tmode = TYPE_MODE (type);
17845 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17846 int i, n_elt = GET_MODE_NUNITS (tmode);
17847 rtvec v = rtvec_alloc (n_elt);
17849 gcc_assert (VECTOR_MODE_P (tmode));
17850 gcc_assert (call_expr_nargs (exp) == n_elt);
17852 for (i = 0; i < n_elt; ++i)
17854 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17855 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17858 if (!target || !register_operand (target, tmode))
17859 target = gen_reg_rtx (tmode);
17861 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17865 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17866 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17867 had a language-level syntax for referencing vector elements. */
17870 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17872 enum machine_mode tmode, mode0;
17877 arg0 = CALL_EXPR_ARG (exp, 0);
17878 arg1 = CALL_EXPR_ARG (exp, 1);
17880 op0 = expand_normal (arg0);
17881 elt = get_element_number (TREE_TYPE (arg0), arg1);
17883 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17884 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17885 gcc_assert (VECTOR_MODE_P (mode0));
17887 op0 = force_reg (mode0, op0);
17889 if (optimize || !target || !register_operand (target, tmode))
17890 target = gen_reg_rtx (tmode);
17892 ix86_expand_vector_extract (true, target, op0, elt);
17897 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17898 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17899 a language-level syntax for referencing vector elements. */
17902 ix86_expand_vec_set_builtin (tree exp)
17904 enum machine_mode tmode, mode1;
17905 tree arg0, arg1, arg2;
17907 rtx op0, op1, target;
17909 arg0 = CALL_EXPR_ARG (exp, 0);
17910 arg1 = CALL_EXPR_ARG (exp, 1);
17911 arg2 = CALL_EXPR_ARG (exp, 2);
17913 tmode = TYPE_MODE (TREE_TYPE (arg0));
17914 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17915 gcc_assert (VECTOR_MODE_P (tmode));
17917 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17918 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17919 elt = get_element_number (TREE_TYPE (arg0), arg2);
17921 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17922 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17924 op0 = force_reg (tmode, op0);
17925 op1 = force_reg (mode1, op1);
17927 /* OP0 is the source of these builtin functions and shouldn't be
17928 modified. Create a copy, use it and return it as target. */
17929 target = gen_reg_rtx (tmode);
17930 emit_move_insn (target, op0);
17931 ix86_expand_vector_set (true, target, op1, elt);
17936 /* Expand an expression EXP that calls a built-in function,
17937 with result going to TARGET if that's convenient
17938 (and in mode MODE if that's convenient).
17939 SUBTARGET may be used as the target for computing one of EXP's operands.
17940 IGNORE is nonzero if the value is to be ignored. */
17943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17944 enum machine_mode mode ATTRIBUTE_UNUSED,
17945 int ignore ATTRIBUTE_UNUSED)
17947 const struct builtin_description *d;
17949 enum insn_code icode;
17950 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17951 tree arg0, arg1, arg2, arg3;
17952 rtx op0, op1, op2, op3, pat;
17953 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17954 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17958 case IX86_BUILTIN_EMMS:
17959 emit_insn (gen_mmx_emms ());
17962 case IX86_BUILTIN_SFENCE:
17963 emit_insn (gen_sse_sfence ());
17966 case IX86_BUILTIN_MASKMOVQ:
17967 case IX86_BUILTIN_MASKMOVDQU:
17968 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17969 ? CODE_FOR_mmx_maskmovq
17970 : CODE_FOR_sse2_maskmovdqu);
17971 /* Note the arg order is different from the operand order. */
17972 arg1 = CALL_EXPR_ARG (exp, 0);
17973 arg2 = CALL_EXPR_ARG (exp, 1);
17974 arg0 = CALL_EXPR_ARG (exp, 2);
17975 op0 = expand_normal (arg0);
17976 op1 = expand_normal (arg1);
17977 op2 = expand_normal (arg2);
17978 mode0 = insn_data[icode].operand[0].mode;
17979 mode1 = insn_data[icode].operand[1].mode;
17980 mode2 = insn_data[icode].operand[2].mode;
17982 op0 = force_reg (Pmode, op0);
17983 op0 = gen_rtx_MEM (mode1, op0);
17985 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17986 op0 = copy_to_mode_reg (mode0, op0);
17987 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17988 op1 = copy_to_mode_reg (mode1, op1);
17989 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17990 op2 = copy_to_mode_reg (mode2, op2);
17991 pat = GEN_FCN (icode) (op0, op1, op2);
17997 case IX86_BUILTIN_SQRTSS:
17998 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17999 case IX86_BUILTIN_RSQRTSS:
18000 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18001 case IX86_BUILTIN_RCPSS:
18002 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18004 case IX86_BUILTIN_LOADUPS:
18005 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18007 case IX86_BUILTIN_STOREUPS:
18008 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18010 case IX86_BUILTIN_LOADHPS:
18011 case IX86_BUILTIN_LOADLPS:
18012 case IX86_BUILTIN_LOADHPD:
18013 case IX86_BUILTIN_LOADLPD:
18014 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18015 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18016 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18017 : CODE_FOR_sse2_loadlpd);
18018 arg0 = CALL_EXPR_ARG (exp, 0);
18019 arg1 = CALL_EXPR_ARG (exp, 1);
18020 op0 = expand_normal (arg0);
18021 op1 = expand_normal (arg1);
18022 tmode = insn_data[icode].operand[0].mode;
18023 mode0 = insn_data[icode].operand[1].mode;
18024 mode1 = insn_data[icode].operand[2].mode;
18026 op0 = force_reg (mode0, op0);
18027 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18028 if (optimize || target == 0
18029 || GET_MODE (target) != tmode
18030 || !register_operand (target, tmode))
18031 target = gen_reg_rtx (tmode);
18032 pat = GEN_FCN (icode) (target, op0, op1);
18038 case IX86_BUILTIN_STOREHPS:
18039 case IX86_BUILTIN_STORELPS:
18040 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18041 : CODE_FOR_sse_storelps);
18042 arg0 = CALL_EXPR_ARG (exp, 0);
18043 arg1 = CALL_EXPR_ARG (exp, 1);
18044 op0 = expand_normal (arg0);
18045 op1 = expand_normal (arg1);
18046 mode0 = insn_data[icode].operand[0].mode;
18047 mode1 = insn_data[icode].operand[1].mode;
18049 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18050 op1 = force_reg (mode1, op1);
18052 pat = GEN_FCN (icode) (op0, op1);
18058 case IX86_BUILTIN_MOVNTPS:
18059 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18060 case IX86_BUILTIN_MOVNTQ:
18061 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18063 case IX86_BUILTIN_LDMXCSR:
18064 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18065 target = assign_386_stack_local (SImode, SLOT_TEMP);
18066 emit_move_insn (target, op0);
18067 emit_insn (gen_sse_ldmxcsr (target));
18070 case IX86_BUILTIN_STMXCSR:
18071 target = assign_386_stack_local (SImode, SLOT_TEMP);
18072 emit_insn (gen_sse_stmxcsr (target));
18073 return copy_to_mode_reg (SImode, target);
18075 case IX86_BUILTIN_SHUFPS:
18076 case IX86_BUILTIN_SHUFPD:
18077 icode = (fcode == IX86_BUILTIN_SHUFPS
18078 ? CODE_FOR_sse_shufps
18079 : CODE_FOR_sse2_shufpd);
18080 arg0 = CALL_EXPR_ARG (exp, 0);
18081 arg1 = CALL_EXPR_ARG (exp, 1);
18082 arg2 = CALL_EXPR_ARG (exp, 2);
18083 op0 = expand_normal (arg0);
18084 op1 = expand_normal (arg1);
18085 op2 = expand_normal (arg2);
18086 tmode = insn_data[icode].operand[0].mode;
18087 mode0 = insn_data[icode].operand[1].mode;
18088 mode1 = insn_data[icode].operand[2].mode;
18089 mode2 = insn_data[icode].operand[3].mode;
18091 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18092 op0 = copy_to_mode_reg (mode0, op0);
18093 if ((optimize && !register_operand (op1, mode1))
18094 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18095 op1 = copy_to_mode_reg (mode1, op1);
18096 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18098 /* @@@ better error message */
18099 error ("mask must be an immediate");
18100 return gen_reg_rtx (tmode);
18102 if (optimize || target == 0
18103 || GET_MODE (target) != tmode
18104 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18105 target = gen_reg_rtx (tmode);
18106 pat = GEN_FCN (icode) (target, op0, op1, op2);
18112 case IX86_BUILTIN_PSHUFW:
18113 case IX86_BUILTIN_PSHUFD:
18114 case IX86_BUILTIN_PSHUFHW:
18115 case IX86_BUILTIN_PSHUFLW:
18116 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18117 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18118 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18119 : CODE_FOR_mmx_pshufw);
18120 arg0 = CALL_EXPR_ARG (exp, 0);
18121 arg1 = CALL_EXPR_ARG (exp, 1);
18122 op0 = expand_normal (arg0);
18123 op1 = expand_normal (arg1);
18124 tmode = insn_data[icode].operand[0].mode;
18125 mode1 = insn_data[icode].operand[1].mode;
18126 mode2 = insn_data[icode].operand[2].mode;
18128 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18129 op0 = copy_to_mode_reg (mode1, op0);
18130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18132 /* @@@ better error message */
18133 error ("mask must be an immediate");
18137 || GET_MODE (target) != tmode
18138 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18139 target = gen_reg_rtx (tmode);
18140 pat = GEN_FCN (icode) (target, op0, op1);
18146 case IX86_BUILTIN_PSLLWI128:
18147 icode = CODE_FOR_ashlv8hi3;
18149 case IX86_BUILTIN_PSLLDI128:
18150 icode = CODE_FOR_ashlv4si3;
18152 case IX86_BUILTIN_PSLLQI128:
18153 icode = CODE_FOR_ashlv2di3;
18155 case IX86_BUILTIN_PSRAWI128:
18156 icode = CODE_FOR_ashrv8hi3;
18158 case IX86_BUILTIN_PSRADI128:
18159 icode = CODE_FOR_ashrv4si3;
18161 case IX86_BUILTIN_PSRLWI128:
18162 icode = CODE_FOR_lshrv8hi3;
18164 case IX86_BUILTIN_PSRLDI128:
18165 icode = CODE_FOR_lshrv4si3;
18167 case IX86_BUILTIN_PSRLQI128:
18168 icode = CODE_FOR_lshrv2di3;
18171 arg0 = CALL_EXPR_ARG (exp, 0);
18172 arg1 = CALL_EXPR_ARG (exp, 1);
18173 op0 = expand_normal (arg0);
18174 op1 = expand_normal (arg1);
18176 if (!CONST_INT_P (op1))
18178 error ("shift must be an immediate");
18181 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18182 op1 = GEN_INT (255);
18184 tmode = insn_data[icode].operand[0].mode;
18185 mode1 = insn_data[icode].operand[1].mode;
18186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18187 op0 = copy_to_reg (op0);
18189 target = gen_reg_rtx (tmode);
18190 pat = GEN_FCN (icode) (target, op0, op1);
18196 case IX86_BUILTIN_PSLLW128:
18197 icode = CODE_FOR_ashlv8hi3;
18199 case IX86_BUILTIN_PSLLD128:
18200 icode = CODE_FOR_ashlv4si3;
18202 case IX86_BUILTIN_PSLLQ128:
18203 icode = CODE_FOR_ashlv2di3;
18205 case IX86_BUILTIN_PSRAW128:
18206 icode = CODE_FOR_ashrv8hi3;
18208 case IX86_BUILTIN_PSRAD128:
18209 icode = CODE_FOR_ashrv4si3;
18211 case IX86_BUILTIN_PSRLW128:
18212 icode = CODE_FOR_lshrv8hi3;
18214 case IX86_BUILTIN_PSRLD128:
18215 icode = CODE_FOR_lshrv4si3;
18217 case IX86_BUILTIN_PSRLQ128:
18218 icode = CODE_FOR_lshrv2di3;
18221 arg0 = CALL_EXPR_ARG (exp, 0);
18222 arg1 = CALL_EXPR_ARG (exp, 1);
18223 op0 = expand_normal (arg0);
18224 op1 = expand_normal (arg1);
18226 tmode = insn_data[icode].operand[0].mode;
18227 mode1 = insn_data[icode].operand[1].mode;
18229 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18230 op0 = copy_to_reg (op0);
18232 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18233 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18234 op1 = copy_to_reg (op1);
18236 target = gen_reg_rtx (tmode);
18237 pat = GEN_FCN (icode) (target, op0, op1);
18243 case IX86_BUILTIN_PSLLDQI128:
18244 case IX86_BUILTIN_PSRLDQI128:
18245 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18246 : CODE_FOR_sse2_lshrti3);
18247 arg0 = CALL_EXPR_ARG (exp, 0);
18248 arg1 = CALL_EXPR_ARG (exp, 1);
18249 op0 = expand_normal (arg0);
18250 op1 = expand_normal (arg1);
18251 tmode = insn_data[icode].operand[0].mode;
18252 mode1 = insn_data[icode].operand[1].mode;
18253 mode2 = insn_data[icode].operand[2].mode;
18255 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18257 op0 = copy_to_reg (op0);
18258 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18260 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18262 error ("shift must be an immediate");
18265 target = gen_reg_rtx (V2DImode);
18266 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18273 case IX86_BUILTIN_FEMMS:
18274 emit_insn (gen_mmx_femms ());
18277 case IX86_BUILTIN_PAVGUSB:
18278 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18280 case IX86_BUILTIN_PF2ID:
18281 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18283 case IX86_BUILTIN_PFACC:
18284 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18286 case IX86_BUILTIN_PFADD:
18287 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18289 case IX86_BUILTIN_PFCMPEQ:
18290 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18292 case IX86_BUILTIN_PFCMPGE:
18293 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18295 case IX86_BUILTIN_PFCMPGT:
18296 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18298 case IX86_BUILTIN_PFMAX:
18299 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18301 case IX86_BUILTIN_PFMIN:
18302 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18304 case IX86_BUILTIN_PFMUL:
18305 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18307 case IX86_BUILTIN_PFRCP:
18308 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18310 case IX86_BUILTIN_PFRCPIT1:
18311 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18313 case IX86_BUILTIN_PFRCPIT2:
18314 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18316 case IX86_BUILTIN_PFRSQIT1:
18317 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18319 case IX86_BUILTIN_PFRSQRT:
18320 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18322 case IX86_BUILTIN_PFSUB:
18323 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18325 case IX86_BUILTIN_PFSUBR:
18326 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18328 case IX86_BUILTIN_PI2FD:
18329 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18331 case IX86_BUILTIN_PMULHRW:
18332 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18334 case IX86_BUILTIN_PF2IW:
18335 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18337 case IX86_BUILTIN_PFNACC:
18338 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18340 case IX86_BUILTIN_PFPNACC:
18341 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18343 case IX86_BUILTIN_PI2FW:
18344 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18346 case IX86_BUILTIN_PSWAPDSI:
18347 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18349 case IX86_BUILTIN_PSWAPDSF:
18350 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18352 case IX86_BUILTIN_SQRTSD:
18353 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18354 case IX86_BUILTIN_LOADUPD:
18355 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18356 case IX86_BUILTIN_STOREUPD:
18357 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18359 case IX86_BUILTIN_MFENCE:
18360 emit_insn (gen_sse2_mfence ());
18362 case IX86_BUILTIN_LFENCE:
18363 emit_insn (gen_sse2_lfence ());
18366 case IX86_BUILTIN_CLFLUSH:
18367 arg0 = CALL_EXPR_ARG (exp, 0);
18368 op0 = expand_normal (arg0);
18369 icode = CODE_FOR_sse2_clflush;
18370 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18371 op0 = copy_to_mode_reg (Pmode, op0);
18373 emit_insn (gen_sse2_clflush (op0));
18376 case IX86_BUILTIN_MOVNTPD:
18377 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18378 case IX86_BUILTIN_MOVNTDQ:
18379 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18380 case IX86_BUILTIN_MOVNTI:
18381 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18383 case IX86_BUILTIN_LOADDQU:
18384 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18385 case IX86_BUILTIN_STOREDQU:
18386 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18388 case IX86_BUILTIN_MONITOR:
18389 arg0 = CALL_EXPR_ARG (exp, 0);
18390 arg1 = CALL_EXPR_ARG (exp, 1);
18391 arg2 = CALL_EXPR_ARG (exp, 2);
18392 op0 = expand_normal (arg0);
18393 op1 = expand_normal (arg1);
18394 op2 = expand_normal (arg2);
18396 op0 = copy_to_mode_reg (Pmode, op0);
18398 op1 = copy_to_mode_reg (SImode, op1);
18400 op2 = copy_to_mode_reg (SImode, op2);
18402 emit_insn (gen_sse3_monitor (op0, op1, op2));
18404 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18407 case IX86_BUILTIN_MWAIT:
18408 arg0 = CALL_EXPR_ARG (exp, 0);
18409 arg1 = CALL_EXPR_ARG (exp, 1);
18410 op0 = expand_normal (arg0);
18411 op1 = expand_normal (arg1);
18413 op0 = copy_to_mode_reg (SImode, op0);
18415 op1 = copy_to_mode_reg (SImode, op1);
18416 emit_insn (gen_sse3_mwait (op0, op1));
18419 case IX86_BUILTIN_LDDQU:
18420 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18423 case IX86_BUILTIN_PALIGNR:
18424 case IX86_BUILTIN_PALIGNR128:
18425 if (fcode == IX86_BUILTIN_PALIGNR)
18427 icode = CODE_FOR_ssse3_palignrdi;
18432 icode = CODE_FOR_ssse3_palignrti;
18435 arg0 = CALL_EXPR_ARG (exp, 0);
18436 arg1 = CALL_EXPR_ARG (exp, 1);
18437 arg2 = CALL_EXPR_ARG (exp, 2);
18438 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18439 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18440 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18441 tmode = insn_data[icode].operand[0].mode;
18442 mode1 = insn_data[icode].operand[1].mode;
18443 mode2 = insn_data[icode].operand[2].mode;
18444 mode3 = insn_data[icode].operand[3].mode;
18446 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18448 op0 = copy_to_reg (op0);
18449 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18451 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18453 op1 = copy_to_reg (op1);
18454 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18456 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18458 error ("shift must be an immediate");
18461 target = gen_reg_rtx (mode);
18462 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18469 case IX86_BUILTIN_MOVNTSD:
18470 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18472 case IX86_BUILTIN_MOVNTSS:
18473 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18475 case IX86_BUILTIN_INSERTQ:
18476 case IX86_BUILTIN_EXTRQ:
18477 icode = (fcode == IX86_BUILTIN_EXTRQ
18478 ? CODE_FOR_sse4a_extrq
18479 : CODE_FOR_sse4a_insertq);
18480 arg0 = CALL_EXPR_ARG (exp, 0);
18481 arg1 = CALL_EXPR_ARG (exp, 1);
18482 op0 = expand_normal (arg0);
18483 op1 = expand_normal (arg1);
18484 tmode = insn_data[icode].operand[0].mode;
18485 mode1 = insn_data[icode].operand[1].mode;
18486 mode2 = insn_data[icode].operand[2].mode;
18487 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18488 op0 = copy_to_mode_reg (mode1, op0);
18489 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18490 op1 = copy_to_mode_reg (mode2, op1);
18491 if (optimize || target == 0
18492 || GET_MODE (target) != tmode
18493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18494 target = gen_reg_rtx (tmode);
18495 pat = GEN_FCN (icode) (target, op0, op1);
18501 case IX86_BUILTIN_EXTRQI:
18502 icode = CODE_FOR_sse4a_extrqi;
18503 arg0 = CALL_EXPR_ARG (exp, 0);
18504 arg1 = CALL_EXPR_ARG (exp, 1);
18505 arg2 = CALL_EXPR_ARG (exp, 2);
18506 op0 = expand_normal (arg0);
18507 op1 = expand_normal (arg1);
18508 op2 = expand_normal (arg2);
18509 tmode = insn_data[icode].operand[0].mode;
18510 mode1 = insn_data[icode].operand[1].mode;
18511 mode2 = insn_data[icode].operand[2].mode;
18512 mode3 = insn_data[icode].operand[3].mode;
18513 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18514 op0 = copy_to_mode_reg (mode1, op0);
18515 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18517 error ("index mask must be an immediate");
18518 return gen_reg_rtx (tmode);
18520 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18522 error ("length mask must be an immediate");
18523 return gen_reg_rtx (tmode);
18525 if (optimize || target == 0
18526 || GET_MODE (target) != tmode
18527 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18528 target = gen_reg_rtx (tmode);
18529 pat = GEN_FCN (icode) (target, op0, op1, op2);
18535 case IX86_BUILTIN_INSERTQI:
18536 icode = CODE_FOR_sse4a_insertqi;
18537 arg0 = CALL_EXPR_ARG (exp, 0);
18538 arg1 = CALL_EXPR_ARG (exp, 1);
18539 arg2 = CALL_EXPR_ARG (exp, 2);
18540 arg3 = CALL_EXPR_ARG (exp, 3);
18541 op0 = expand_normal (arg0);
18542 op1 = expand_normal (arg1);
18543 op2 = expand_normal (arg2);
18544 op3 = expand_normal (arg3);
18545 tmode = insn_data[icode].operand[0].mode;
18546 mode1 = insn_data[icode].operand[1].mode;
18547 mode2 = insn_data[icode].operand[2].mode;
18548 mode3 = insn_data[icode].operand[3].mode;
18549 mode4 = insn_data[icode].operand[4].mode;
18551 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18552 op0 = copy_to_mode_reg (mode1, op0);
18554 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18555 op1 = copy_to_mode_reg (mode2, op1);
18557 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18559 error ("index mask must be an immediate");
18560 return gen_reg_rtx (tmode);
18562 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18564 error ("length mask must be an immediate");
18565 return gen_reg_rtx (tmode);
18567 if (optimize || target == 0
18568 || GET_MODE (target) != tmode
18569 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18570 target = gen_reg_rtx (tmode);
18571 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18577 case IX86_BUILTIN_VEC_INIT_V2SI:
18578 case IX86_BUILTIN_VEC_INIT_V4HI:
18579 case IX86_BUILTIN_VEC_INIT_V8QI:
18580 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18582 case IX86_BUILTIN_VEC_EXT_V2DF:
18583 case IX86_BUILTIN_VEC_EXT_V2DI:
18584 case IX86_BUILTIN_VEC_EXT_V4SF:
18585 case IX86_BUILTIN_VEC_EXT_V4SI:
18586 case IX86_BUILTIN_VEC_EXT_V8HI:
18587 case IX86_BUILTIN_VEC_EXT_V2SI:
18588 case IX86_BUILTIN_VEC_EXT_V4HI:
18589 return ix86_expand_vec_ext_builtin (exp, target);
18591 case IX86_BUILTIN_VEC_SET_V8HI:
18592 case IX86_BUILTIN_VEC_SET_V4HI:
18593 return ix86_expand_vec_set_builtin (exp);
18599 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18600 if (d->code == fcode)
18602 /* Compares are treated specially. */
18603 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18604 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18605 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18606 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18607 return ix86_expand_sse_compare (d, exp, target);
18609 return ix86_expand_binop_builtin (d->icode, exp, target);
18612 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18613 if (d->code == fcode)
18614 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18616 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18617 if (d->code == fcode)
18618 return ix86_expand_sse_comi (d, exp, target);
18620 gcc_unreachable ();
18623 /* Returns a function decl for a vectorized version of the builtin function
18624 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18625 if it is not available. */
18628 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18631 enum machine_mode in_mode, out_mode;
18634 if (TREE_CODE (type_out) != VECTOR_TYPE
18635 || TREE_CODE (type_in) != VECTOR_TYPE)
18638 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18639 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18640 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18641 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18645 case BUILT_IN_SQRT:
18646 if (out_mode == DFmode && out_n == 2
18647 && in_mode == DFmode && in_n == 2)
18648 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18651 case BUILT_IN_SQRTF:
18652 if (out_mode == SFmode && out_n == 4
18653 && in_mode == SFmode && in_n == 4)
18654 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18657 case BUILT_IN_LRINTF:
18658 if (out_mode == SImode && out_n == 4
18659 && in_mode == SFmode && in_n == 4)
18660 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18670 /* Returns a decl of a function that implements conversion of the
18671 input vector of type TYPE, or NULL_TREE if it is not available. */
18674 ix86_builtin_conversion (enum tree_code code, tree type)
18676 if (TREE_CODE (type) != VECTOR_TYPE)
18682 switch (TYPE_MODE (type))
18685 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18690 case FIX_TRUNC_EXPR:
18691 switch (TYPE_MODE (type))
18694 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18704 /* Store OPERAND to the memory after reload is completed. This means
18705 that we can't easily use assign_stack_local. */
18707 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18711 gcc_assert (reload_completed);
18712 if (TARGET_RED_ZONE)
18714 result = gen_rtx_MEM (mode,
18715 gen_rtx_PLUS (Pmode,
18717 GEN_INT (-RED_ZONE_SIZE)));
18718 emit_move_insn (result, operand);
18720 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18726 operand = gen_lowpart (DImode, operand);
18730 gen_rtx_SET (VOIDmode,
18731 gen_rtx_MEM (DImode,
18732 gen_rtx_PRE_DEC (DImode,
18733 stack_pointer_rtx)),
18737 gcc_unreachable ();
18739 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18748 split_di (&operand, 1, operands, operands + 1);
18750 gen_rtx_SET (VOIDmode,
18751 gen_rtx_MEM (SImode,
18752 gen_rtx_PRE_DEC (Pmode,
18753 stack_pointer_rtx)),
18756 gen_rtx_SET (VOIDmode,
18757 gen_rtx_MEM (SImode,
18758 gen_rtx_PRE_DEC (Pmode,
18759 stack_pointer_rtx)),
18764 /* Store HImodes as SImodes. */
18765 operand = gen_lowpart (SImode, operand);
18769 gen_rtx_SET (VOIDmode,
18770 gen_rtx_MEM (GET_MODE (operand),
18771 gen_rtx_PRE_DEC (SImode,
18772 stack_pointer_rtx)),
18776 gcc_unreachable ();
18778 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18783 /* Free operand from the memory. */
18785 ix86_free_from_memory (enum machine_mode mode)
18787 if (!TARGET_RED_ZONE)
18791 if (mode == DImode || TARGET_64BIT)
18795 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18796 to pop or add instruction if registers are available. */
18797 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18798 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18803 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18804 QImode must go into class Q_REGS.
18805 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18806 movdf to do mem-to-mem moves through integer regs. */
18808 ix86_preferred_reload_class (rtx x, enum reg_class class)
18810 enum machine_mode mode = GET_MODE (x);
18812 /* We're only allowed to return a subclass of CLASS. Many of the
18813 following checks fail for NO_REGS, so eliminate that early. */
18814 if (class == NO_REGS)
18817 /* All classes can load zeros. */
18818 if (x == CONST0_RTX (mode))
18821 /* Force constants into memory if we are loading a (nonzero) constant into
18822 an MMX or SSE register. This is because there are no MMX/SSE instructions
18823 to load from a constant. */
18825 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18828 /* Prefer SSE regs only, if we can use them for math. */
18829 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18830 return SSE_CLASS_P (class) ? class : NO_REGS;
18832 /* Floating-point constants need more complex checks. */
18833 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18835 /* General regs can load everything. */
18836 if (reg_class_subset_p (class, GENERAL_REGS))
18839 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18840 zero above. We only want to wind up preferring 80387 registers if
18841 we plan on doing computation with them. */
18843 && standard_80387_constant_p (x))
18845 /* Limit class to non-sse. */
18846 if (class == FLOAT_SSE_REGS)
18848 if (class == FP_TOP_SSE_REGS)
18850 if (class == FP_SECOND_SSE_REGS)
18851 return FP_SECOND_REG;
18852 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18859 /* Generally when we see PLUS here, it's the function invariant
18860 (plus soft-fp const_int). Which can only be computed into general
18862 if (GET_CODE (x) == PLUS)
18863 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18865 /* QImode constants are easy to load, but non-constant QImode data
18866 must go into Q_REGS. */
18867 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18869 if (reg_class_subset_p (class, Q_REGS))
18871 if (reg_class_subset_p (Q_REGS, class))
18879 /* Discourage putting floating-point values in SSE registers unless
18880 SSE math is being used, and likewise for the 387 registers. */
18882 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18884 enum machine_mode mode = GET_MODE (x);
18886 /* Restrict the output reload class to the register bank that we are doing
18887 math on. If we would like not to return a subset of CLASS, reject this
18888 alternative: if reload cannot do this, it will still use its choice. */
18889 mode = GET_MODE (x);
18890 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18891 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18893 if (X87_FLOAT_MODE_P (mode))
18895 if (class == FP_TOP_SSE_REGS)
18897 else if (class == FP_SECOND_SSE_REGS)
18898 return FP_SECOND_REG;
18900 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18906 /* If we are copying between general and FP registers, we need a memory
18907 location. The same is true for SSE and MMX registers.
18909 The macro can't work reliably when one of the CLASSES is class containing
18910 registers from multiple units (SSE, MMX, integer). We avoid this by never
18911 combining those units in single alternative in the machine description.
18912 Ensure that this constraint holds to avoid unexpected surprises.
18914 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18915 enforce these sanity checks. */
18918 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18919 enum machine_mode mode, int strict)
18921 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18922 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18923 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18924 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18925 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18926 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18928 gcc_assert (!strict);
18932 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18935 /* ??? This is a lie. We do have moves between mmx/general, and for
18936 mmx/sse2. But by saying we need secondary memory we discourage the
18937 register allocator from using the mmx registers unless needed. */
18938 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18941 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18943 /* SSE1 doesn't have any direct moves from other classes. */
18947 /* If the target says that inter-unit moves are more expensive
18948 than moving through memory, then don't generate them. */
18949 if (!TARGET_INTER_UNIT_MOVES)
18952 /* Between SSE and general, we have moves no larger than word size. */
18953 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18960 /* Return true if the registers in CLASS cannot represent the change from
18961 modes FROM to TO. */
18964 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18965 enum reg_class class)
18970 /* x87 registers can't do subreg at all, as all values are reformatted
18971 to extended precision. */
18972 if (MAYBE_FLOAT_CLASS_P (class))
18975 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18977 /* Vector registers do not support QI or HImode loads. If we don't
18978 disallow a change to these modes, reload will assume it's ok to
18979 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18980 the vec_dupv4hi pattern. */
18981 if (GET_MODE_SIZE (from) < 4)
18984 /* Vector registers do not support subreg with nonzero offsets, which
18985 are otherwise valid for integer registers. Since we can't see
18986 whether we have a nonzero offset from here, prohibit all
18987 nonparadoxical subregs changing size. */
18988 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18995 /* Return the cost of moving data from a register in class CLASS1 to
18996 one in class CLASS2.
18998 It is not required that the cost always equal 2 when FROM is the same as TO;
18999 on some machines it is expensive to move between registers if they are not
19000 general registers. */
19003 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19004 enum reg_class class2)
19006 /* In case we require secondary memory, compute cost of the store followed
19007 by load. In order to avoid bad register allocation choices, we need
19008 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19010 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19014 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19015 MEMORY_MOVE_COST (mode, class1, 1));
19016 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19017 MEMORY_MOVE_COST (mode, class2, 1));
19019 /* In case of copying from general_purpose_register we may emit multiple
19020 stores followed by single load causing memory size mismatch stall.
19021 Count this as arbitrarily high cost of 20. */
19022 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19025 /* In the case of FP/MMX moves, the registers actually overlap, and we
19026 have to switch modes in order to treat them differently. */
19027 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19028 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19034 /* Moves between SSE/MMX and integer unit are expensive. */
19035 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19036 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19037 return ix86_cost->mmxsse_to_integer;
19038 if (MAYBE_FLOAT_CLASS_P (class1))
19039 return ix86_cost->fp_move;
19040 if (MAYBE_SSE_CLASS_P (class1))
19041 return ix86_cost->sse_move;
19042 if (MAYBE_MMX_CLASS_P (class1))
19043 return ix86_cost->mmx_move;
19047 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19050 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19052 /* Flags and only flags can only hold CCmode values. */
19053 if (CC_REGNO_P (regno))
19054 return GET_MODE_CLASS (mode) == MODE_CC;
19055 if (GET_MODE_CLASS (mode) == MODE_CC
19056 || GET_MODE_CLASS (mode) == MODE_RANDOM
19057 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19059 if (FP_REGNO_P (regno))
19060 return VALID_FP_MODE_P (mode);
19061 if (SSE_REGNO_P (regno))
19063 /* We implement the move patterns for all vector modes into and
19064 out of SSE registers, even when no operation instructions
19066 return (VALID_SSE_REG_MODE (mode)
19067 || VALID_SSE2_REG_MODE (mode)
19068 || VALID_MMX_REG_MODE (mode)
19069 || VALID_MMX_REG_MODE_3DNOW (mode));
19071 if (MMX_REGNO_P (regno))
19073 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19074 so if the register is available at all, then we can move data of
19075 the given mode into or out of it. */
19076 return (VALID_MMX_REG_MODE (mode)
19077 || VALID_MMX_REG_MODE_3DNOW (mode));
19080 if (mode == QImode)
19082 /* Take care for QImode values - they can be in non-QI regs,
19083 but then they do cause partial register stalls. */
19084 if (regno < 4 || TARGET_64BIT)
19086 if (!TARGET_PARTIAL_REG_STALL)
19088 return reload_in_progress || reload_completed;
19090 /* We handle both integer and floats in the general purpose registers. */
19091 else if (VALID_INT_MODE_P (mode))
19093 else if (VALID_FP_MODE_P (mode))
19095 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19096 on to use that value in smaller contexts, this can easily force a
19097 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19098 supporting DImode, allow it. */
19099 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19105 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19106 tieable integer mode. */
19109 ix86_tieable_integer_mode_p (enum machine_mode mode)
19118 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19121 return TARGET_64BIT;
19128 /* Return true if MODE1 is accessible in a register that can hold MODE2
19129 without copying. That is, all register classes that can hold MODE2
19130 can also hold MODE1. */
19133 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19135 if (mode1 == mode2)
19138 if (ix86_tieable_integer_mode_p (mode1)
19139 && ix86_tieable_integer_mode_p (mode2))
19142 /* MODE2 being XFmode implies fp stack or general regs, which means we
19143 can tie any smaller floating point modes to it. Note that we do not
19144 tie this with TFmode. */
19145 if (mode2 == XFmode)
19146 return mode1 == SFmode || mode1 == DFmode;
19148 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19149 that we can tie it with SFmode. */
19150 if (mode2 == DFmode)
19151 return mode1 == SFmode;
19153 /* If MODE2 is only appropriate for an SSE register, then tie with
19154 any other mode acceptable to SSE registers. */
19155 if (GET_MODE_SIZE (mode2) == 16
19156 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19157 return (GET_MODE_SIZE (mode1) == 16
19158 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19160 /* If MODE2 is appropriate for an MMX register, then tie
19161 with any other mode acceptable to MMX registers. */
19162 if (GET_MODE_SIZE (mode2) == 8
19163 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19164 return (GET_MODE_SIZE (mode1) == 8
19165 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19170 /* Return the cost of moving data of mode M between a
19171 register and memory. A value of 2 is the default; this cost is
19172 relative to those in `REGISTER_MOVE_COST'.
19174 If moving between registers and memory is more expensive than
19175 between two registers, you should define this macro to express the
19178 Model also increased moving costs of QImode registers in non
19182 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19184 if (FLOAT_CLASS_P (class))
19201 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19203 if (SSE_CLASS_P (class))
19206 switch (GET_MODE_SIZE (mode))
19220 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19222 if (MMX_CLASS_P (class))
19225 switch (GET_MODE_SIZE (mode))
19236 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19238 switch (GET_MODE_SIZE (mode))
19242 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19243 : ix86_cost->movzbl_load);
19245 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19246 : ix86_cost->int_store[0] + 4);
19249 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19251 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19252 if (mode == TFmode)
19254 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19255 * (((int) GET_MODE_SIZE (mode)
19256 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19260 /* Compute a (partial) cost for rtx X. Return true if the complete
19261 cost has been computed, and false if subexpressions should be
19262 scanned. In either case, *TOTAL contains the cost result. */
19265 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19267 enum machine_mode mode = GET_MODE (x);
19275 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19277 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19279 else if (flag_pic && SYMBOLIC_CONST (x)
19281 || (!GET_CODE (x) != LABEL_REF
19282 && (GET_CODE (x) != SYMBOL_REF
19283 || !SYMBOL_REF_LOCAL_P (x)))))
19290 if (mode == VOIDmode)
19293 switch (standard_80387_constant_p (x))
19298 default: /* Other constants */
19303 /* Start with (MEM (SYMBOL_REF)), since that's where
19304 it'll probably end up. Add a penalty for size. */
19305 *total = (COSTS_N_INSNS (1)
19306 + (flag_pic != 0 && !TARGET_64BIT)
19307 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19313 /* The zero extensions is often completely free on x86_64, so make
19314 it as cheap as possible. */
19315 if (TARGET_64BIT && mode == DImode
19316 && GET_MODE (XEXP (x, 0)) == SImode)
19318 else if (TARGET_ZERO_EXTEND_WITH_AND)
19319 *total = ix86_cost->add;
19321 *total = ix86_cost->movzx;
19325 *total = ix86_cost->movsx;
19329 if (CONST_INT_P (XEXP (x, 1))
19330 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19332 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19335 *total = ix86_cost->add;
19338 if ((value == 2 || value == 3)
19339 && ix86_cost->lea <= ix86_cost->shift_const)
19341 *total = ix86_cost->lea;
19351 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19353 if (CONST_INT_P (XEXP (x, 1)))
19355 if (INTVAL (XEXP (x, 1)) > 32)
19356 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19358 *total = ix86_cost->shift_const * 2;
19362 if (GET_CODE (XEXP (x, 1)) == AND)
19363 *total = ix86_cost->shift_var * 2;
19365 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19370 if (CONST_INT_P (XEXP (x, 1)))
19371 *total = ix86_cost->shift_const;
19373 *total = ix86_cost->shift_var;
19378 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19380 /* ??? SSE scalar cost should be used here. */
19381 *total = ix86_cost->fmul;
19384 else if (X87_FLOAT_MODE_P (mode))
19386 *total = ix86_cost->fmul;
19389 else if (FLOAT_MODE_P (mode))
19391 /* ??? SSE vector cost should be used here. */
19392 *total = ix86_cost->fmul;
19397 rtx op0 = XEXP (x, 0);
19398 rtx op1 = XEXP (x, 1);
19400 if (CONST_INT_P (XEXP (x, 1)))
19402 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19403 for (nbits = 0; value != 0; value &= value - 1)
19407 /* This is arbitrary. */
19410 /* Compute costs correctly for widening multiplication. */
19411 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19412 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19413 == GET_MODE_SIZE (mode))
19415 int is_mulwiden = 0;
19416 enum machine_mode inner_mode = GET_MODE (op0);
19418 if (GET_CODE (op0) == GET_CODE (op1))
19419 is_mulwiden = 1, op1 = XEXP (op1, 0);
19420 else if (CONST_INT_P (op1))
19422 if (GET_CODE (op0) == SIGN_EXTEND)
19423 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19426 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19430 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19433 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19434 + nbits * ix86_cost->mult_bit
19435 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19444 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19445 /* ??? SSE cost should be used here. */
19446 *total = ix86_cost->fdiv;
19447 else if (X87_FLOAT_MODE_P (mode))
19448 *total = ix86_cost->fdiv;
19449 else if (FLOAT_MODE_P (mode))
19450 /* ??? SSE vector cost should be used here. */
19451 *total = ix86_cost->fdiv;
19453 *total = ix86_cost->divide[MODE_INDEX (mode)];
19457 if (GET_MODE_CLASS (mode) == MODE_INT
19458 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19460 if (GET_CODE (XEXP (x, 0)) == PLUS
19461 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19462 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19463 && CONSTANT_P (XEXP (x, 1)))
19465 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19466 if (val == 2 || val == 4 || val == 8)
19468 *total = ix86_cost->lea;
19469 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19470 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19472 *total += rtx_cost (XEXP (x, 1), outer_code);
19476 else if (GET_CODE (XEXP (x, 0)) == MULT
19477 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19479 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19480 if (val == 2 || val == 4 || val == 8)
19482 *total = ix86_cost->lea;
19483 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19484 *total += rtx_cost (XEXP (x, 1), outer_code);
19488 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19490 *total = ix86_cost->lea;
19491 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19492 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19493 *total += rtx_cost (XEXP (x, 1), outer_code);
19500 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19502 /* ??? SSE cost should be used here. */
19503 *total = ix86_cost->fadd;
19506 else if (X87_FLOAT_MODE_P (mode))
19508 *total = ix86_cost->fadd;
19511 else if (FLOAT_MODE_P (mode))
19513 /* ??? SSE vector cost should be used here. */
19514 *total = ix86_cost->fadd;
19522 if (!TARGET_64BIT && mode == DImode)
19524 *total = (ix86_cost->add * 2
19525 + (rtx_cost (XEXP (x, 0), outer_code)
19526 << (GET_MODE (XEXP (x, 0)) != DImode))
19527 + (rtx_cost (XEXP (x, 1), outer_code)
19528 << (GET_MODE (XEXP (x, 1)) != DImode)));
19534 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19536 /* ??? SSE cost should be used here. */
19537 *total = ix86_cost->fchs;
19540 else if (X87_FLOAT_MODE_P (mode))
19542 *total = ix86_cost->fchs;
19545 else if (FLOAT_MODE_P (mode))
19547 /* ??? SSE vector cost should be used here. */
19548 *total = ix86_cost->fchs;
19554 if (!TARGET_64BIT && mode == DImode)
19555 *total = ix86_cost->add * 2;
19557 *total = ix86_cost->add;
19561 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19562 && XEXP (XEXP (x, 0), 1) == const1_rtx
19563 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19564 && XEXP (x, 1) == const0_rtx)
19566 /* This kind of construct is implemented using test[bwl].
19567 Treat it as if we had an AND. */
19568 *total = (ix86_cost->add
19569 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19570 + rtx_cost (const1_rtx, outer_code));
19576 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19581 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19582 /* ??? SSE cost should be used here. */
19583 *total = ix86_cost->fabs;
19584 else if (X87_FLOAT_MODE_P (mode))
19585 *total = ix86_cost->fabs;
19586 else if (FLOAT_MODE_P (mode))
19587 /* ??? SSE vector cost should be used here. */
19588 *total = ix86_cost->fabs;
19592 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19593 /* ??? SSE cost should be used here. */
19594 *total = ix86_cost->fsqrt;
19595 else if (X87_FLOAT_MODE_P (mode))
19596 *total = ix86_cost->fsqrt;
19597 else if (FLOAT_MODE_P (mode))
19598 /* ??? SSE vector cost should be used here. */
19599 *total = ix86_cost->fsqrt;
19603 if (XINT (x, 1) == UNSPEC_TP)
19614 static int current_machopic_label_num;
19616 /* Given a symbol name and its associated stub, write out the
19617 definition of the stub. */
19620 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19622 unsigned int length;
19623 char *binder_name, *symbol_name, lazy_ptr_name[32];
19624 int label = ++current_machopic_label_num;
19626 /* For 64-bit we shouldn't get here. */
19627 gcc_assert (!TARGET_64BIT);
19629 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19630 symb = (*targetm.strip_name_encoding) (symb);
19632 length = strlen (stub);
19633 binder_name = alloca (length + 32);
19634 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19636 length = strlen (symb);
19637 symbol_name = alloca (length + 32);
19638 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19640 sprintf (lazy_ptr_name, "L%d$lz", label);
19643 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19645 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19647 fprintf (file, "%s:\n", stub);
19648 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19652 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19653 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19654 fprintf (file, "\tjmp\t*%%edx\n");
19657 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19659 fprintf (file, "%s:\n", binder_name);
19663 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19664 fprintf (file, "\tpushl\t%%eax\n");
19667 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19669 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19671 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19672 fprintf (file, "%s:\n", lazy_ptr_name);
19673 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19674 fprintf (file, "\t.long %s\n", binder_name);
19678 darwin_x86_file_end (void)
19680 darwin_file_end ();
19683 #endif /* TARGET_MACHO */
19685 /* Order the registers for register allocator. */
19688 x86_order_regs_for_local_alloc (void)
19693 /* First allocate the local general purpose registers. */
19694 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19695 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19696 reg_alloc_order [pos++] = i;
19698 /* Global general purpose registers. */
19699 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19700 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19701 reg_alloc_order [pos++] = i;
19703 /* x87 registers come first in case we are doing FP math
19705 if (!TARGET_SSE_MATH)
19706 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19707 reg_alloc_order [pos++] = i;
19709 /* SSE registers. */
19710 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19711 reg_alloc_order [pos++] = i;
19712 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19713 reg_alloc_order [pos++] = i;
19715 /* x87 registers. */
19716 if (TARGET_SSE_MATH)
19717 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19718 reg_alloc_order [pos++] = i;
19720 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19721 reg_alloc_order [pos++] = i;
19723 /* Initialize the rest of array as we do not allocate some registers
19725 while (pos < FIRST_PSEUDO_REGISTER)
19726 reg_alloc_order [pos++] = 0;
19729 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19730 struct attribute_spec.handler. */
19732 ix86_handle_struct_attribute (tree *node, tree name,
19733 tree args ATTRIBUTE_UNUSED,
19734 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19737 if (DECL_P (*node))
19739 if (TREE_CODE (*node) == TYPE_DECL)
19740 type = &TREE_TYPE (*node);
19745 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19746 || TREE_CODE (*type) == UNION_TYPE)))
19748 warning (OPT_Wattributes, "%qs attribute ignored",
19749 IDENTIFIER_POINTER (name));
19750 *no_add_attrs = true;
19753 else if ((is_attribute_p ("ms_struct", name)
19754 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19755 || ((is_attribute_p ("gcc_struct", name)
19756 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19758 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19759 IDENTIFIER_POINTER (name));
19760 *no_add_attrs = true;
19767 ix86_ms_bitfield_layout_p (tree record_type)
19769 return (TARGET_MS_BITFIELD_LAYOUT &&
19770 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19771 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19774 /* Returns an expression indicating where the this parameter is
19775 located on entry to the FUNCTION. */
19778 x86_this_parameter (tree function)
19780 tree type = TREE_TYPE (function);
19781 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19785 const int *parm_regs;
19787 if (TARGET_64BIT_MS_ABI)
19788 parm_regs = x86_64_ms_abi_int_parameter_registers;
19790 parm_regs = x86_64_int_parameter_registers;
19791 return gen_rtx_REG (DImode, parm_regs[aggr]);
19794 if (ix86_function_regparm (type, function) > 0
19795 && !type_has_variadic_args_p (type))
19798 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19800 return gen_rtx_REG (SImode, regno);
19803 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
19806 /* Determine whether x86_output_mi_thunk can succeed. */
19809 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19810 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19811 HOST_WIDE_INT vcall_offset, tree function)
19813 /* 64-bit can handle anything. */
19817 /* For 32-bit, everything's fine if we have one free register. */
19818 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19821 /* Need a free register for vcall_offset. */
19825 /* Need a free register for GOT references. */
19826 if (flag_pic && !(*targetm.binds_local_p) (function))
19829 /* Otherwise ok. */
19833 /* Output the assembler code for a thunk function. THUNK_DECL is the
19834 declaration for the thunk function itself, FUNCTION is the decl for
19835 the target function. DELTA is an immediate constant offset to be
19836 added to THIS. If VCALL_OFFSET is nonzero, the word at
19837 *(*this + vcall_offset) should be added to THIS. */
19840 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19841 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19842 HOST_WIDE_INT vcall_offset, tree function)
19845 rtx this = x86_this_parameter (function);
19848 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19849 pull it in now and let DELTA benefit. */
19852 else if (vcall_offset)
19854 /* Put the this parameter into %eax. */
19856 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19857 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19860 this_reg = NULL_RTX;
19862 /* Adjust the this parameter by a fixed constant. */
19865 xops[0] = GEN_INT (delta);
19866 xops[1] = this_reg ? this_reg : this;
19869 if (!x86_64_general_operand (xops[0], DImode))
19871 tmp = gen_rtx_REG (DImode, R10_REG);
19873 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19877 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19880 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19883 /* Adjust the this parameter by a value stored in the vtable. */
19887 tmp = gen_rtx_REG (DImode, R10_REG);
19890 int tmp_regno = 2 /* ECX */;
19891 if (lookup_attribute ("fastcall",
19892 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19893 tmp_regno = 0 /* EAX */;
19894 tmp = gen_rtx_REG (SImode, tmp_regno);
19897 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19900 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19902 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19904 /* Adjust the this parameter. */
19905 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19906 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19908 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19909 xops[0] = GEN_INT (vcall_offset);
19911 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19912 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19914 xops[1] = this_reg;
19916 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19918 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19921 /* If necessary, drop THIS back to its stack slot. */
19922 if (this_reg && this_reg != this)
19924 xops[0] = this_reg;
19926 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19929 xops[0] = XEXP (DECL_RTL (function), 0);
19932 if (!flag_pic || (*targetm.binds_local_p) (function))
19933 output_asm_insn ("jmp\t%P0", xops);
19934 /* All thunks should be in the same object as their target,
19935 and thus binds_local_p should be true. */
19936 else if (TARGET_64BIT_MS_ABI)
19937 gcc_unreachable ();
19940 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19941 tmp = gen_rtx_CONST (Pmode, tmp);
19942 tmp = gen_rtx_MEM (QImode, tmp);
19944 output_asm_insn ("jmp\t%A0", xops);
19949 if (!flag_pic || (*targetm.binds_local_p) (function))
19950 output_asm_insn ("jmp\t%P0", xops);
19955 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19956 tmp = (gen_rtx_SYMBOL_REF
19958 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19959 tmp = gen_rtx_MEM (QImode, tmp);
19961 output_asm_insn ("jmp\t%0", xops);
19964 #endif /* TARGET_MACHO */
19966 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19967 output_set_got (tmp, NULL_RTX);
19970 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19971 output_asm_insn ("jmp\t{*}%1", xops);
19977 x86_file_start (void)
19979 default_file_start ();
19981 darwin_file_start ();
19983 if (X86_FILE_START_VERSION_DIRECTIVE)
19984 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19985 if (X86_FILE_START_FLTUSED)
19986 fputs ("\t.global\t__fltused\n", asm_out_file);
19987 if (ix86_asm_dialect == ASM_INTEL)
19988 fputs ("\t.intel_syntax\n", asm_out_file);
19992 x86_field_alignment (tree field, int computed)
19994 enum machine_mode mode;
19995 tree type = TREE_TYPE (field);
19997 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19999 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20000 ? get_inner_array_type (type) : type);
20001 if (mode == DFmode || mode == DCmode
20002 || GET_MODE_CLASS (mode) == MODE_INT
20003 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20004 return MIN (32, computed);
20008 /* Output assembler code to FILE to increment profiler label # LABELNO
20009 for profiling a function entry. */
20011 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20015 #ifndef NO_PROFILE_COUNTERS
20016 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20019 if (!TARGET_64BIT_MS_ABI && flag_pic)
20020 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20022 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20026 #ifndef NO_PROFILE_COUNTERS
20027 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20028 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20030 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20034 #ifndef NO_PROFILE_COUNTERS
20035 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20036 PROFILE_COUNT_REGISTER);
20038 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20042 /* We don't have exact information about the insn sizes, but we may assume
20043 quite safely that we are informed about all 1 byte insns and memory
20044 address sizes. This is enough to eliminate unnecessary padding in
20048 min_insn_size (rtx insn)
20052 if (!INSN_P (insn) || !active_insn_p (insn))
20055 /* Discard alignments we've emit and jump instructions. */
20056 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20057 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20060 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20061 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20064 /* Important case - calls are always 5 bytes.
20065 It is common to have many calls in the row. */
20067 && symbolic_reference_mentioned_p (PATTERN (insn))
20068 && !SIBLING_CALL_P (insn))
20070 if (get_attr_length (insn) <= 1)
20073 /* For normal instructions we may rely on the sizes of addresses
20074 and the presence of symbol to require 4 bytes of encoding.
20075 This is not the case for jumps where references are PC relative. */
20076 if (!JUMP_P (insn))
20078 l = get_attr_length_address (insn);
20079 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20088 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20092 ix86_avoid_jump_misspredicts (void)
20094 rtx insn, start = get_insns ();
20095 int nbytes = 0, njumps = 0;
20098 /* Look for all minimal intervals of instructions containing 4 jumps.
20099 The intervals are bounded by START and INSN. NBYTES is the total
20100 size of instructions in the interval including INSN and not including
20101 START. When the NBYTES is smaller than 16 bytes, it is possible
20102 that the end of START and INSN ends up in the same 16byte page.
20104 The smallest offset in the page INSN can start is the case where START
20105 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20106 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20108 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20111 nbytes += min_insn_size (insn);
20113 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20114 INSN_UID (insn), min_insn_size (insn));
20116 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20117 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20125 start = NEXT_INSN (start);
20126 if ((JUMP_P (start)
20127 && GET_CODE (PATTERN (start)) != ADDR_VEC
20128 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20130 njumps--, isjump = 1;
20133 nbytes -= min_insn_size (start);
20135 gcc_assert (njumps >= 0);
20137 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20138 INSN_UID (start), INSN_UID (insn), nbytes);
20140 if (njumps == 3 && isjump && nbytes < 16)
20142 int padsize = 15 - nbytes + min_insn_size (insn);
20145 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20146 INSN_UID (insn), padsize);
20147 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20152 /* AMD Athlon works faster
20153 when RET is not destination of conditional jump or directly preceded
20154 by other jump instruction. We avoid the penalty by inserting NOP just
20155 before the RET instructions in such cases. */
20157 ix86_pad_returns (void)
20162 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20164 basic_block bb = e->src;
20165 rtx ret = BB_END (bb);
20167 bool replace = false;
20169 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20170 || !maybe_hot_bb_p (bb))
20172 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20173 if (active_insn_p (prev) || LABEL_P (prev))
20175 if (prev && LABEL_P (prev))
20180 FOR_EACH_EDGE (e, ei, bb->preds)
20181 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20182 && !(e->flags & EDGE_FALLTHRU))
20187 prev = prev_active_insn (ret);
20189 && ((JUMP_P (prev) && any_condjump_p (prev))
20192 /* Empty functions get branch mispredict even when the jump destination
20193 is not visible to us. */
20194 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20199 emit_insn_before (gen_return_internal_long (), ret);
20205 /* Implement machine specific optimizations. We implement padding of returns
20206 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20210 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20211 ix86_pad_returns ();
20212 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20213 ix86_avoid_jump_misspredicts ();
20216 /* Return nonzero when QImode register that must be represented via REX prefix
20219 x86_extended_QIreg_mentioned_p (rtx insn)
20222 extract_insn_cached (insn);
20223 for (i = 0; i < recog_data.n_operands; i++)
20224 if (REG_P (recog_data.operand[i])
20225 && REGNO (recog_data.operand[i]) >= 4)
20230 /* Return nonzero when P points to register encoded via REX prefix.
20231 Called via for_each_rtx. */
20233 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20235 unsigned int regno;
20238 regno = REGNO (*p);
20239 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20242 /* Return true when INSN mentions register that must be encoded using REX
20245 x86_extended_reg_mentioned_p (rtx insn)
20247 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20250 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20251 optabs would emit if we didn't have TFmode patterns. */
20254 x86_emit_floatuns (rtx operands[2])
20256 rtx neglab, donelab, i0, i1, f0, in, out;
20257 enum machine_mode mode, inmode;
20259 inmode = GET_MODE (operands[1]);
20260 gcc_assert (inmode == SImode || inmode == DImode);
20263 in = force_reg (inmode, operands[1]);
20264 mode = GET_MODE (out);
20265 neglab = gen_label_rtx ();
20266 donelab = gen_label_rtx ();
20267 f0 = gen_reg_rtx (mode);
20269 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20271 expand_float (out, in, 0);
20273 emit_jump_insn (gen_jump (donelab));
20276 emit_label (neglab);
20278 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20280 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20282 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20284 expand_float (f0, i0, 0);
20286 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20288 emit_label (donelab);
20291 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20292 with all elements equal to VAR. Return true if successful. */
20295 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20296 rtx target, rtx val)
20298 enum machine_mode smode, wsmode, wvmode;
20313 val = force_reg (GET_MODE_INNER (mode), val);
20314 x = gen_rtx_VEC_DUPLICATE (mode, val);
20315 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20321 if (TARGET_SSE || TARGET_3DNOW_A)
20323 val = gen_lowpart (SImode, val);
20324 x = gen_rtx_TRUNCATE (HImode, val);
20325 x = gen_rtx_VEC_DUPLICATE (mode, x);
20326 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20348 /* Extend HImode to SImode using a paradoxical SUBREG. */
20349 tmp1 = gen_reg_rtx (SImode);
20350 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20351 /* Insert the SImode value as low element of V4SImode vector. */
20352 tmp2 = gen_reg_rtx (V4SImode);
20353 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20354 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20355 CONST0_RTX (V4SImode),
20357 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20358 /* Cast the V4SImode vector back to a V8HImode vector. */
20359 tmp1 = gen_reg_rtx (V8HImode);
20360 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20361 /* Duplicate the low short through the whole low SImode word. */
20362 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20363 /* Cast the V8HImode vector back to a V4SImode vector. */
20364 tmp2 = gen_reg_rtx (V4SImode);
20365 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20366 /* Replicate the low element of the V4SImode vector. */
20367 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20368 /* Cast the V2SImode back to V8HImode, and store in target. */
20369 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20380 /* Extend QImode to SImode using a paradoxical SUBREG. */
20381 tmp1 = gen_reg_rtx (SImode);
20382 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20383 /* Insert the SImode value as low element of V4SImode vector. */
20384 tmp2 = gen_reg_rtx (V4SImode);
20385 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20386 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20387 CONST0_RTX (V4SImode),
20389 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20390 /* Cast the V4SImode vector back to a V16QImode vector. */
20391 tmp1 = gen_reg_rtx (V16QImode);
20392 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20393 /* Duplicate the low byte through the whole low SImode word. */
20394 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20395 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20396 /* Cast the V16QImode vector back to a V4SImode vector. */
20397 tmp2 = gen_reg_rtx (V4SImode);
20398 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20399 /* Replicate the low element of the V4SImode vector. */
20400 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20401 /* Cast the V2SImode back to V16QImode, and store in target. */
20402 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20410 /* Replicate the value once into the next wider mode and recurse. */
20411 val = convert_modes (wsmode, smode, val, true);
20412 x = expand_simple_binop (wsmode, ASHIFT, val,
20413 GEN_INT (GET_MODE_BITSIZE (smode)),
20414 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20415 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20417 x = gen_reg_rtx (wvmode);
20418 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20419 gcc_unreachable ();
20420 emit_move_insn (target, gen_lowpart (mode, x));
20428 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20429 whose ONE_VAR element is VAR, and other elements are zero. Return true
20433 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20434 rtx target, rtx var, int one_var)
20436 enum machine_mode vsimode;
20452 var = force_reg (GET_MODE_INNER (mode), var);
20453 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20454 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20459 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20460 new_target = gen_reg_rtx (mode);
20462 new_target = target;
20463 var = force_reg (GET_MODE_INNER (mode), var);
20464 x = gen_rtx_VEC_DUPLICATE (mode, var);
20465 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20466 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20469 /* We need to shuffle the value to the correct position, so
20470 create a new pseudo to store the intermediate result. */
20472 /* With SSE2, we can use the integer shuffle insns. */
20473 if (mode != V4SFmode && TARGET_SSE2)
20475 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20477 GEN_INT (one_var == 1 ? 0 : 1),
20478 GEN_INT (one_var == 2 ? 0 : 1),
20479 GEN_INT (one_var == 3 ? 0 : 1)));
20480 if (target != new_target)
20481 emit_move_insn (target, new_target);
20485 /* Otherwise convert the intermediate result to V4SFmode and
20486 use the SSE1 shuffle instructions. */
20487 if (mode != V4SFmode)
20489 tmp = gen_reg_rtx (V4SFmode);
20490 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20495 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20497 GEN_INT (one_var == 1 ? 0 : 1),
20498 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20499 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20501 if (mode != V4SFmode)
20502 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20503 else if (tmp != target)
20504 emit_move_insn (target, tmp);
20506 else if (target != new_target)
20507 emit_move_insn (target, new_target);
20512 vsimode = V4SImode;
20518 vsimode = V2SImode;
20524 /* Zero extend the variable element to SImode and recurse. */
20525 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20527 x = gen_reg_rtx (vsimode);
20528 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20530 gcc_unreachable ();
20532 emit_move_insn (target, gen_lowpart (mode, x));
20540 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20541 consisting of the values in VALS. It is known that all elements
20542 except ONE_VAR are constants. Return true if successful. */
20545 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20546 rtx target, rtx vals, int one_var)
20548 rtx var = XVECEXP (vals, 0, one_var);
20549 enum machine_mode wmode;
20552 const_vec = copy_rtx (vals);
20553 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20554 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20562 /* For the two element vectors, it's just as easy to use
20563 the general case. */
20579 /* There's no way to set one QImode entry easily. Combine
20580 the variable value with its adjacent constant value, and
20581 promote to an HImode set. */
20582 x = XVECEXP (vals, 0, one_var ^ 1);
20585 var = convert_modes (HImode, QImode, var, true);
20586 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20587 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20588 x = GEN_INT (INTVAL (x) & 0xff);
20592 var = convert_modes (HImode, QImode, var, true);
20593 x = gen_int_mode (INTVAL (x) << 8, HImode);
20595 if (x != const0_rtx)
20596 var = expand_simple_binop (HImode, IOR, var, x, var,
20597 1, OPTAB_LIB_WIDEN);
20599 x = gen_reg_rtx (wmode);
20600 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20601 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20603 emit_move_insn (target, gen_lowpart (mode, x));
20610 emit_move_insn (target, const_vec);
20611 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20615 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20616 all values variable, and none identical. */
20619 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20620 rtx target, rtx vals)
20622 enum machine_mode half_mode = GET_MODE_INNER (mode);
20623 rtx op0 = NULL, op1 = NULL;
20624 bool use_vec_concat = false;
20630 if (!mmx_ok && !TARGET_SSE)
20636 /* For the two element vectors, we always implement VEC_CONCAT. */
20637 op0 = XVECEXP (vals, 0, 0);
20638 op1 = XVECEXP (vals, 0, 1);
20639 use_vec_concat = true;
20643 half_mode = V2SFmode;
20646 half_mode = V2SImode;
20652 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20653 Recurse to load the two halves. */
20655 op0 = gen_reg_rtx (half_mode);
20656 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20657 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20659 op1 = gen_reg_rtx (half_mode);
20660 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20661 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20663 use_vec_concat = true;
20674 gcc_unreachable ();
20677 if (use_vec_concat)
20679 if (!register_operand (op0, half_mode))
20680 op0 = force_reg (half_mode, op0);
20681 if (!register_operand (op1, half_mode))
20682 op1 = force_reg (half_mode, op1);
20684 emit_insn (gen_rtx_SET (VOIDmode, target,
20685 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20689 int i, j, n_elts, n_words, n_elt_per_word;
20690 enum machine_mode inner_mode;
20691 rtx words[4], shift;
20693 inner_mode = GET_MODE_INNER (mode);
20694 n_elts = GET_MODE_NUNITS (mode);
20695 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20696 n_elt_per_word = n_elts / n_words;
20697 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20699 for (i = 0; i < n_words; ++i)
20701 rtx word = NULL_RTX;
20703 for (j = 0; j < n_elt_per_word; ++j)
20705 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20706 elt = convert_modes (word_mode, inner_mode, elt, true);
20712 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20713 word, 1, OPTAB_LIB_WIDEN);
20714 word = expand_simple_binop (word_mode, IOR, word, elt,
20715 word, 1, OPTAB_LIB_WIDEN);
20723 emit_move_insn (target, gen_lowpart (mode, words[0]));
20724 else if (n_words == 2)
20726 rtx tmp = gen_reg_rtx (mode);
20727 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20728 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20729 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20730 emit_move_insn (target, tmp);
20732 else if (n_words == 4)
20734 rtx tmp = gen_reg_rtx (V4SImode);
20735 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20736 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20737 emit_move_insn (target, gen_lowpart (mode, tmp));
20740 gcc_unreachable ();
20744 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20745 instructions unless MMX_OK is true. */
20748 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20750 enum machine_mode mode = GET_MODE (target);
20751 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20752 int n_elts = GET_MODE_NUNITS (mode);
20753 int n_var = 0, one_var = -1;
20754 bool all_same = true, all_const_zero = true;
20758 for (i = 0; i < n_elts; ++i)
20760 x = XVECEXP (vals, 0, i);
20761 if (!CONSTANT_P (x))
20762 n_var++, one_var = i;
20763 else if (x != CONST0_RTX (inner_mode))
20764 all_const_zero = false;
20765 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20769 /* Constants are best loaded from the constant pool. */
20772 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20776 /* If all values are identical, broadcast the value. */
20778 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20779 XVECEXP (vals, 0, 0)))
20782 /* Values where only one field is non-constant are best loaded from
20783 the pool and overwritten via move later. */
20787 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20788 XVECEXP (vals, 0, one_var),
20792 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20796 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20800 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20802 enum machine_mode mode = GET_MODE (target);
20803 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20804 bool use_vec_merge = false;
20813 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20814 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20816 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20818 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20819 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20829 /* For the two element vectors, we implement a VEC_CONCAT with
20830 the extraction of the other element. */
20832 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20833 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20836 op0 = val, op1 = tmp;
20838 op0 = tmp, op1 = val;
20840 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20841 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20849 use_vec_merge = true;
20853 /* tmp = target = A B C D */
20854 tmp = copy_to_reg (target);
20855 /* target = A A B B */
20856 emit_insn (gen_sse_unpcklps (target, target, target));
20857 /* target = X A B B */
20858 ix86_expand_vector_set (false, target, val, 0);
20859 /* target = A X C D */
20860 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20861 GEN_INT (1), GEN_INT (0),
20862 GEN_INT (2+4), GEN_INT (3+4)));
20866 /* tmp = target = A B C D */
20867 tmp = copy_to_reg (target);
20868 /* tmp = X B C D */
20869 ix86_expand_vector_set (false, tmp, val, 0);
20870 /* target = A B X D */
20871 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20872 GEN_INT (0), GEN_INT (1),
20873 GEN_INT (0+4), GEN_INT (3+4)));
20877 /* tmp = target = A B C D */
20878 tmp = copy_to_reg (target);
20879 /* tmp = X B C D */
20880 ix86_expand_vector_set (false, tmp, val, 0);
20881 /* target = A B X D */
20882 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20883 GEN_INT (0), GEN_INT (1),
20884 GEN_INT (2+4), GEN_INT (0+4)));
20888 gcc_unreachable ();
20893 /* Element 0 handled by vec_merge below. */
20896 use_vec_merge = true;
20902 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20903 store into element 0, then shuffle them back. */
20907 order[0] = GEN_INT (elt);
20908 order[1] = const1_rtx;
20909 order[2] = const2_rtx;
20910 order[3] = GEN_INT (3);
20911 order[elt] = const0_rtx;
20913 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20914 order[1], order[2], order[3]));
20916 ix86_expand_vector_set (false, target, val, 0);
20918 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20919 order[1], order[2], order[3]));
20923 /* For SSE1, we have to reuse the V4SF code. */
20924 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20925 gen_lowpart (SFmode, val), elt);
20930 use_vec_merge = TARGET_SSE2;
20933 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20944 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20945 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20946 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20950 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20952 emit_move_insn (mem, target);
20954 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20955 emit_move_insn (tmp, val);
20957 emit_move_insn (target, mem);
20962 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20964 enum machine_mode mode = GET_MODE (vec);
20965 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20966 bool use_vec_extr = false;
20979 use_vec_extr = true;
20991 tmp = gen_reg_rtx (mode);
20992 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20993 GEN_INT (elt), GEN_INT (elt),
20994 GEN_INT (elt+4), GEN_INT (elt+4)));
20998 tmp = gen_reg_rtx (mode);
20999 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21003 gcc_unreachable ();
21006 use_vec_extr = true;
21021 tmp = gen_reg_rtx (mode);
21022 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21023 GEN_INT (elt), GEN_INT (elt),
21024 GEN_INT (elt), GEN_INT (elt)));
21028 tmp = gen_reg_rtx (mode);
21029 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21033 gcc_unreachable ();
21036 use_vec_extr = true;
21041 /* For SSE1, we have to reuse the V4SF code. */
21042 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21043 gen_lowpart (V4SFmode, vec), elt);
21049 use_vec_extr = TARGET_SSE2;
21052 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21057 /* ??? Could extract the appropriate HImode element and shift. */
21064 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21065 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21067 /* Let the rtl optimizers know about the zero extension performed. */
21068 if (inner_mode == HImode)
21070 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21071 target = gen_lowpart (SImode, target);
21074 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21078 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21080 emit_move_insn (mem, vec);
21082 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21083 emit_move_insn (target, tmp);
21087 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21088 pattern to reduce; DEST is the destination; IN is the input vector. */
21091 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21093 rtx tmp1, tmp2, tmp3;
21095 tmp1 = gen_reg_rtx (V4SFmode);
21096 tmp2 = gen_reg_rtx (V4SFmode);
21097 tmp3 = gen_reg_rtx (V4SFmode);
21099 emit_insn (gen_sse_movhlps (tmp1, in, in));
21100 emit_insn (fn (tmp2, tmp1, in));
21102 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21103 GEN_INT (1), GEN_INT (1),
21104 GEN_INT (1+4), GEN_INT (1+4)));
21105 emit_insn (fn (dest, tmp2, tmp3));
21108 /* Target hook for scalar_mode_supported_p. */
21110 ix86_scalar_mode_supported_p (enum machine_mode mode)
21112 if (DECIMAL_FLOAT_MODE_P (mode))
21115 return default_scalar_mode_supported_p (mode);
21118 /* Implements target hook vector_mode_supported_p. */
21120 ix86_vector_mode_supported_p (enum machine_mode mode)
21122 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21124 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21126 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21128 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21133 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21135 We do this in the new i386 backend to maintain source compatibility
21136 with the old cc0-based compiler. */
21139 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21140 tree inputs ATTRIBUTE_UNUSED,
21143 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21145 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21150 /* Implements target vector targetm.asm.encode_section_info. This
21151 is not used by netware. */
21153 static void ATTRIBUTE_UNUSED
21154 ix86_encode_section_info (tree decl, rtx rtl, int first)
21156 default_encode_section_info (decl, rtl, first);
21158 if (TREE_CODE (decl) == VAR_DECL
21159 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21160 && ix86_in_large_data_p (decl))
21161 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21164 /* Worker function for REVERSE_CONDITION. */
21167 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21169 return (mode != CCFPmode && mode != CCFPUmode
21170 ? reverse_condition (code)
21171 : reverse_condition_maybe_unordered (code));
21174 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21178 output_387_reg_move (rtx insn, rtx *operands)
21180 if (REG_P (operands[0]))
21182 if (REG_P (operands[1])
21183 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21185 if (REGNO (operands[0]) == FIRST_STACK_REG)
21186 return output_387_ffreep (operands, 0);
21187 return "fstp\t%y0";
21189 if (STACK_TOP_P (operands[0]))
21190 return "fld%z1\t%y1";
21193 else if (MEM_P (operands[0]))
21195 gcc_assert (REG_P (operands[1]));
21196 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21197 return "fstp%z0\t%y0";
21200 /* There is no non-popping store to memory for XFmode.
21201 So if we need one, follow the store with a load. */
21202 if (GET_MODE (operands[0]) == XFmode)
21203 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21205 return "fst%z0\t%y0";
21212 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21213 FP status register is set. */
21216 ix86_emit_fp_unordered_jump (rtx label)
21218 rtx reg = gen_reg_rtx (HImode);
21221 emit_insn (gen_x86_fnstsw_1 (reg));
21223 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21225 emit_insn (gen_x86_sahf_1 (reg));
21227 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21228 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21232 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21234 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21235 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21238 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21239 gen_rtx_LABEL_REF (VOIDmode, label),
21241 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21243 emit_jump_insn (temp);
21244 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21247 /* Output code to perform a log1p XFmode calculation. */
21249 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21251 rtx label1 = gen_label_rtx ();
21252 rtx label2 = gen_label_rtx ();
21254 rtx tmp = gen_reg_rtx (XFmode);
21255 rtx tmp2 = gen_reg_rtx (XFmode);
21257 emit_insn (gen_absxf2 (tmp, op1));
21258 emit_insn (gen_cmpxf (tmp,
21259 CONST_DOUBLE_FROM_REAL_VALUE (
21260 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21262 emit_jump_insn (gen_bge (label1));
21264 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21265 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21266 emit_jump (label2);
21268 emit_label (label1);
21269 emit_move_insn (tmp, CONST1_RTX (XFmode));
21270 emit_insn (gen_addxf3 (tmp, op1, tmp));
21271 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21272 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21274 emit_label (label2);
21277 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21279 static void ATTRIBUTE_UNUSED
21280 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21283 /* With Binutils 2.15, the "@unwind" marker must be specified on
21284 every occurrence of the ".eh_frame" section, not just the first
21287 && strcmp (name, ".eh_frame") == 0)
21289 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21290 flags & SECTION_WRITE ? "aw" : "a");
21293 default_elf_asm_named_section (name, flags, decl);
21296 /* Return the mangling of TYPE if it is an extended fundamental type. */
21298 static const char *
21299 ix86_mangle_fundamental_type (tree type)
21301 switch (TYPE_MODE (type))
21304 /* __float128 is "g". */
21307 /* "long double" or __float80 is "e". */
21314 /* For 32-bit code we can save PIC register setup by using
21315 __stack_chk_fail_local hidden function instead of calling
21316 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21317 register, so it is better to call __stack_chk_fail directly. */
21320 ix86_stack_protect_fail (void)
21322 return TARGET_64BIT
21323 ? default_external_stack_protect_fail ()
21324 : default_hidden_stack_protect_fail ();
21327 /* Select a format to encode pointers in exception handling data. CODE
21328 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21329 true if the symbol may be affected by dynamic relocations.
21331 ??? All x86 object file formats are capable of representing this.
21332 After all, the relocation needed is the same as for the call insn.
21333 Whether or not a particular assembler allows us to enter such, I
21334 guess we'll have to see. */
21336 asm_preferred_eh_data_format (int code, int global)
21340 int type = DW_EH_PE_sdata8;
21342 || ix86_cmodel == CM_SMALL_PIC
21343 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21344 type = DW_EH_PE_sdata4;
21345 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21347 if (ix86_cmodel == CM_SMALL
21348 || (ix86_cmodel == CM_MEDIUM && code))
21349 return DW_EH_PE_udata4;
21350 return DW_EH_PE_absptr;
21353 /* Expand copysign from SIGN to the positive value ABS_VALUE
21354 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21357 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21359 enum machine_mode mode = GET_MODE (sign);
21360 rtx sgn = gen_reg_rtx (mode);
21361 if (mask == NULL_RTX)
21363 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21364 if (!VECTOR_MODE_P (mode))
21366 /* We need to generate a scalar mode mask in this case. */
21367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21369 mask = gen_reg_rtx (mode);
21370 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21374 mask = gen_rtx_NOT (mode, mask);
21375 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21376 gen_rtx_AND (mode, mask, sign)));
21377 emit_insn (gen_rtx_SET (VOIDmode, result,
21378 gen_rtx_IOR (mode, abs_value, sgn)));
21381 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21382 mask for masking out the sign-bit is stored in *SMASK, if that is
21385 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21387 enum machine_mode mode = GET_MODE (op0);
21390 xa = gen_reg_rtx (mode);
21391 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21392 if (!VECTOR_MODE_P (mode))
21394 /* We need to generate a scalar mode mask in this case. */
21395 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21396 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21397 mask = gen_reg_rtx (mode);
21398 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21400 emit_insn (gen_rtx_SET (VOIDmode, xa,
21401 gen_rtx_AND (mode, op0, mask)));
21409 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21410 swapping the operands if SWAP_OPERANDS is true. The expanded
21411 code is a forward jump to a newly created label in case the
21412 comparison is true. The generated label rtx is returned. */
21414 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21415 bool swap_operands)
21426 label = gen_label_rtx ();
21427 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21428 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21429 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21430 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21432 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21433 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21434 JUMP_LABEL (tmp) = label;
21439 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21440 using comparison code CODE. Operands are swapped for the comparison if
21441 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21443 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21444 bool swap_operands)
21446 enum machine_mode mode = GET_MODE (op0);
21447 rtx mask = gen_reg_rtx (mode);
21456 if (mode == DFmode)
21457 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21458 gen_rtx_fmt_ee (code, mode, op0, op1)));
21460 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21461 gen_rtx_fmt_ee (code, mode, op0, op1)));
21466 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21467 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21469 ix86_gen_TWO52 (enum machine_mode mode)
21471 REAL_VALUE_TYPE TWO52r;
21474 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21475 TWO52 = const_double_from_real_value (TWO52r, mode);
21476 TWO52 = force_reg (mode, TWO52);
21481 /* Expand SSE sequence for computing lround from OP1 storing
21484 ix86_expand_lround (rtx op0, rtx op1)
21486 /* C code for the stuff we're doing below:
21487 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21490 enum machine_mode mode = GET_MODE (op1);
21491 const struct real_format *fmt;
21492 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21495 /* load nextafter (0.5, 0.0) */
21496 fmt = REAL_MODE_FORMAT (mode);
21497 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21498 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21500 /* adj = copysign (0.5, op1) */
21501 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21502 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21504 /* adj = op1 + adj */
21505 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21507 /* op0 = (imode)adj */
21508 expand_fix (op0, adj, 0);
21511 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21514 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21516 /* C code for the stuff we're doing below (for do_floor):
21518 xi -= (double)xi > op1 ? 1 : 0;
21521 enum machine_mode fmode = GET_MODE (op1);
21522 enum machine_mode imode = GET_MODE (op0);
21523 rtx ireg, freg, label, tmp;
21525 /* reg = (long)op1 */
21526 ireg = gen_reg_rtx (imode);
21527 expand_fix (ireg, op1, 0);
21529 /* freg = (double)reg */
21530 freg = gen_reg_rtx (fmode);
21531 expand_float (freg, ireg, 0);
21533 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21534 label = ix86_expand_sse_compare_and_jump (UNLE,
21535 freg, op1, !do_floor);
21536 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21537 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21538 emit_move_insn (ireg, tmp);
21540 emit_label (label);
21541 LABEL_NUSES (label) = 1;
21543 emit_move_insn (op0, ireg);
21546 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21547 result in OPERAND0. */
21549 ix86_expand_rint (rtx operand0, rtx operand1)
21551 /* C code for the stuff we're doing below:
21552 xa = fabs (operand1);
21553 if (!isless (xa, 2**52))
21555 xa = xa + 2**52 - 2**52;
21556 return copysign (xa, operand1);
21558 enum machine_mode mode = GET_MODE (operand0);
21559 rtx res, xa, label, TWO52, mask;
21561 res = gen_reg_rtx (mode);
21562 emit_move_insn (res, operand1);
21564 /* xa = abs (operand1) */
21565 xa = ix86_expand_sse_fabs (res, &mask);
21567 /* if (!isless (xa, TWO52)) goto label; */
21568 TWO52 = ix86_gen_TWO52 (mode);
21569 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21571 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21572 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21574 ix86_sse_copysign_to_positive (res, xa, res, mask);
21576 emit_label (label);
21577 LABEL_NUSES (label) = 1;
21579 emit_move_insn (operand0, res);
21582 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21585 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21587 /* C code for the stuff we expand below.
21588 double xa = fabs (x), x2;
21589 if (!isless (xa, TWO52))
21591 xa = xa + TWO52 - TWO52;
21592 x2 = copysign (xa, x);
21601 enum machine_mode mode = GET_MODE (operand0);
21602 rtx xa, TWO52, tmp, label, one, res, mask;
21604 TWO52 = ix86_gen_TWO52 (mode);
21606 /* Temporary for holding the result, initialized to the input
21607 operand to ease control flow. */
21608 res = gen_reg_rtx (mode);
21609 emit_move_insn (res, operand1);
21611 /* xa = abs (operand1) */
21612 xa = ix86_expand_sse_fabs (res, &mask);
21614 /* if (!isless (xa, TWO52)) goto label; */
21615 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21617 /* xa = xa + TWO52 - TWO52; */
21618 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21619 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21621 /* xa = copysign (xa, operand1) */
21622 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21624 /* generate 1.0 or -1.0 */
21625 one = force_reg (mode,
21626 const_double_from_real_value (do_floor
21627 ? dconst1 : dconstm1, mode));
21629 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21630 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21631 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21632 gen_rtx_AND (mode, one, tmp)));
21633 /* We always need to subtract here to preserve signed zero. */
21634 tmp = expand_simple_binop (mode, MINUS,
21635 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21636 emit_move_insn (res, tmp);
21638 emit_label (label);
21639 LABEL_NUSES (label) = 1;
21641 emit_move_insn (operand0, res);
21644 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21647 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21649 /* C code for the stuff we expand below.
21650 double xa = fabs (x), x2;
21651 if (!isless (xa, TWO52))
21653 x2 = (double)(long)x;
21660 if (HONOR_SIGNED_ZEROS (mode))
21661 return copysign (x2, x);
21664 enum machine_mode mode = GET_MODE (operand0);
21665 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21667 TWO52 = ix86_gen_TWO52 (mode);
21669 /* Temporary for holding the result, initialized to the input
21670 operand to ease control flow. */
21671 res = gen_reg_rtx (mode);
21672 emit_move_insn (res, operand1);
21674 /* xa = abs (operand1) */
21675 xa = ix86_expand_sse_fabs (res, &mask);
21677 /* if (!isless (xa, TWO52)) goto label; */
21678 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21680 /* xa = (double)(long)x */
21681 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21682 expand_fix (xi, res, 0);
21683 expand_float (xa, xi, 0);
21686 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21688 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21689 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21690 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21691 gen_rtx_AND (mode, one, tmp)));
21692 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21693 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21694 emit_move_insn (res, tmp);
21696 if (HONOR_SIGNED_ZEROS (mode))
21697 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21699 emit_label (label);
21700 LABEL_NUSES (label) = 1;
21702 emit_move_insn (operand0, res);
21705 /* Expand SSE sequence for computing round from OPERAND1 storing
21706 into OPERAND0. Sequence that works without relying on DImode truncation
21707 via cvttsd2siq that is only available on 64bit targets. */
21709 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21711 /* C code for the stuff we expand below.
21712 double xa = fabs (x), xa2, x2;
21713 if (!isless (xa, TWO52))
21715 Using the absolute value and copying back sign makes
21716 -0.0 -> -0.0 correct.
21717 xa2 = xa + TWO52 - TWO52;
21722 else if (dxa > 0.5)
21724 x2 = copysign (xa2, x);
21727 enum machine_mode mode = GET_MODE (operand0);
21728 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21730 TWO52 = ix86_gen_TWO52 (mode);
21732 /* Temporary for holding the result, initialized to the input
21733 operand to ease control flow. */
21734 res = gen_reg_rtx (mode);
21735 emit_move_insn (res, operand1);
21737 /* xa = abs (operand1) */
21738 xa = ix86_expand_sse_fabs (res, &mask);
21740 /* if (!isless (xa, TWO52)) goto label; */
21741 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21743 /* xa2 = xa + TWO52 - TWO52; */
21744 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21745 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21747 /* dxa = xa2 - xa; */
21748 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21750 /* generate 0.5, 1.0 and -0.5 */
21751 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21752 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21753 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21757 tmp = gen_reg_rtx (mode);
21758 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21759 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21760 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21761 gen_rtx_AND (mode, one, tmp)));
21762 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21763 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21764 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21765 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21766 gen_rtx_AND (mode, one, tmp)));
21767 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21769 /* res = copysign (xa2, operand1) */
21770 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21772 emit_label (label);
21773 LABEL_NUSES (label) = 1;
21775 emit_move_insn (operand0, res);
21778 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21781 ix86_expand_trunc (rtx operand0, rtx operand1)
21783 /* C code for SSE variant we expand below.
21784 double xa = fabs (x), x2;
21785 if (!isless (xa, TWO52))
21787 x2 = (double)(long)x;
21788 if (HONOR_SIGNED_ZEROS (mode))
21789 return copysign (x2, x);
21792 enum machine_mode mode = GET_MODE (operand0);
21793 rtx xa, xi, TWO52, label, res, mask;
21795 TWO52 = ix86_gen_TWO52 (mode);
21797 /* Temporary for holding the result, initialized to the input
21798 operand to ease control flow. */
21799 res = gen_reg_rtx (mode);
21800 emit_move_insn (res, operand1);
21802 /* xa = abs (operand1) */
21803 xa = ix86_expand_sse_fabs (res, &mask);
21805 /* if (!isless (xa, TWO52)) goto label; */
21806 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21808 /* x = (double)(long)x */
21809 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21810 expand_fix (xi, res, 0);
21811 expand_float (res, xi, 0);
21813 if (HONOR_SIGNED_ZEROS (mode))
21814 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21816 emit_label (label);
21817 LABEL_NUSES (label) = 1;
21819 emit_move_insn (operand0, res);
21822 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21825 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21827 enum machine_mode mode = GET_MODE (operand0);
21828 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21830 /* C code for SSE variant we expand below.
21831 double xa = fabs (x), x2;
21832 if (!isless (xa, TWO52))
21834 xa2 = xa + TWO52 - TWO52;
21838 x2 = copysign (xa2, x);
21842 TWO52 = ix86_gen_TWO52 (mode);
21844 /* Temporary for holding the result, initialized to the input
21845 operand to ease control flow. */
21846 res = gen_reg_rtx (mode);
21847 emit_move_insn (res, operand1);
21849 /* xa = abs (operand1) */
21850 xa = ix86_expand_sse_fabs (res, &smask);
21852 /* if (!isless (xa, TWO52)) goto label; */
21853 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21855 /* res = xa + TWO52 - TWO52; */
21856 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21857 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21858 emit_move_insn (res, tmp);
21861 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21863 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21864 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21865 emit_insn (gen_rtx_SET (VOIDmode, mask,
21866 gen_rtx_AND (mode, mask, one)));
21867 tmp = expand_simple_binop (mode, MINUS,
21868 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21869 emit_move_insn (res, tmp);
21871 /* res = copysign (res, operand1) */
21872 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21874 emit_label (label);
21875 LABEL_NUSES (label) = 1;
21877 emit_move_insn (operand0, res);
21880 /* Expand SSE sequence for computing round from OPERAND1 storing
21883 ix86_expand_round (rtx operand0, rtx operand1)
21885 /* C code for the stuff we're doing below:
21886 double xa = fabs (x);
21887 if (!isless (xa, TWO52))
21889 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21890 return copysign (xa, x);
21892 enum machine_mode mode = GET_MODE (operand0);
21893 rtx res, TWO52, xa, label, xi, half, mask;
21894 const struct real_format *fmt;
21895 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21897 /* Temporary for holding the result, initialized to the input
21898 operand to ease control flow. */
21899 res = gen_reg_rtx (mode);
21900 emit_move_insn (res, operand1);
21902 TWO52 = ix86_gen_TWO52 (mode);
21903 xa = ix86_expand_sse_fabs (res, &mask);
21904 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21906 /* load nextafter (0.5, 0.0) */
21907 fmt = REAL_MODE_FORMAT (mode);
21908 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21909 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21911 /* xa = xa + 0.5 */
21912 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21913 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21915 /* xa = (double)(int64_t)xa */
21916 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21917 expand_fix (xi, xa, 0);
21918 expand_float (xa, xi, 0);
21920 /* res = copysign (xa, operand1) */
21921 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21923 emit_label (label);
21924 LABEL_NUSES (label) = 1;
21926 emit_move_insn (operand0, res);
21930 /* Table of valid machine attributes. */
21931 static const struct attribute_spec ix86_attribute_table[] =
21933 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21934 /* Stdcall attribute says callee is responsible for popping arguments
21935 if they are not variable. */
21936 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21937 /* Fastcall attribute says callee is responsible for popping arguments
21938 if they are not variable. */
21939 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21940 /* Cdecl attribute says the callee is a normal C declaration */
21941 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21942 /* Regparm attribute specifies how many integer arguments are to be
21943 passed in registers. */
21944 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21945 /* Sseregparm attribute says we are using x86_64 calling conventions
21946 for FP arguments. */
21947 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21948 /* force_align_arg_pointer says this function realigns the stack at entry. */
21949 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21950 false, true, true, ix86_handle_cconv_attribute },
21951 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21952 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21953 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21954 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21956 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21957 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21958 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21959 SUBTARGET_ATTRIBUTE_TABLE,
21961 { NULL, 0, 0, false, false, false, NULL }
21964 /* Initialize the GCC target structure. */
21965 #undef TARGET_ATTRIBUTE_TABLE
21966 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21967 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21968 # undef TARGET_MERGE_DECL_ATTRIBUTES
21969 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21972 #undef TARGET_COMP_TYPE_ATTRIBUTES
21973 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21975 #undef TARGET_INIT_BUILTINS
21976 #define TARGET_INIT_BUILTINS ix86_init_builtins
21977 #undef TARGET_EXPAND_BUILTIN
21978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21980 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21981 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21982 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21983 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21985 #undef TARGET_ASM_FUNCTION_EPILOGUE
21986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21988 #undef TARGET_ENCODE_SECTION_INFO
21989 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21990 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21992 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21995 #undef TARGET_ASM_OPEN_PAREN
21996 #define TARGET_ASM_OPEN_PAREN ""
21997 #undef TARGET_ASM_CLOSE_PAREN
21998 #define TARGET_ASM_CLOSE_PAREN ""
22000 #undef TARGET_ASM_ALIGNED_HI_OP
22001 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22002 #undef TARGET_ASM_ALIGNED_SI_OP
22003 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22005 #undef TARGET_ASM_ALIGNED_DI_OP
22006 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22009 #undef TARGET_ASM_UNALIGNED_HI_OP
22010 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22011 #undef TARGET_ASM_UNALIGNED_SI_OP
22012 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22013 #undef TARGET_ASM_UNALIGNED_DI_OP
22014 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22016 #undef TARGET_SCHED_ADJUST_COST
22017 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22018 #undef TARGET_SCHED_ISSUE_RATE
22019 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22020 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22021 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22022 ia32_multipass_dfa_lookahead
22024 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22025 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22028 #undef TARGET_HAVE_TLS
22029 #define TARGET_HAVE_TLS true
22031 #undef TARGET_CANNOT_FORCE_CONST_MEM
22032 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22033 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22034 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22036 #undef TARGET_DELEGITIMIZE_ADDRESS
22037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22039 #undef TARGET_MS_BITFIELD_LAYOUT_P
22040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22043 #undef TARGET_BINDS_LOCAL_P
22044 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22046 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22047 #undef TARGET_BINDS_LOCAL_P
22048 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22051 #undef TARGET_ASM_OUTPUT_MI_THUNK
22052 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22053 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22054 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22056 #undef TARGET_ASM_FILE_START
22057 #define TARGET_ASM_FILE_START x86_file_start
22059 #undef TARGET_DEFAULT_TARGET_FLAGS
22060 #define TARGET_DEFAULT_TARGET_FLAGS \
22062 | TARGET_64BIT_DEFAULT \
22063 | TARGET_SUBTARGET_DEFAULT \
22064 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22066 #undef TARGET_HANDLE_OPTION
22067 #define TARGET_HANDLE_OPTION ix86_handle_option
22069 #undef TARGET_RTX_COSTS
22070 #define TARGET_RTX_COSTS ix86_rtx_costs
22071 #undef TARGET_ADDRESS_COST
22072 #define TARGET_ADDRESS_COST ix86_address_cost
22074 #undef TARGET_FIXED_CONDITION_CODE_REGS
22075 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22076 #undef TARGET_CC_MODES_COMPATIBLE
22077 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22079 #undef TARGET_MACHINE_DEPENDENT_REORG
22080 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22082 #undef TARGET_BUILD_BUILTIN_VA_LIST
22083 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22085 #undef TARGET_MD_ASM_CLOBBERS
22086 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22088 #undef TARGET_PROMOTE_PROTOTYPES
22089 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22090 #undef TARGET_STRUCT_VALUE_RTX
22091 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22092 #undef TARGET_SETUP_INCOMING_VARARGS
22093 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22094 #undef TARGET_MUST_PASS_IN_STACK
22095 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22096 #undef TARGET_PASS_BY_REFERENCE
22097 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22098 #undef TARGET_INTERNAL_ARG_POINTER
22099 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22100 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22101 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22102 #undef TARGET_STRICT_ARGUMENT_NAMING
22103 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22105 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22106 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22108 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22109 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22111 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22112 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22115 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22116 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22119 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22120 #undef TARGET_INSERT_ATTRIBUTES
22121 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22124 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22125 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22127 #undef TARGET_STACK_PROTECT_FAIL
22128 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22130 #undef TARGET_FUNCTION_VALUE
22131 #define TARGET_FUNCTION_VALUE ix86_function_value
22133 struct gcc_target targetm = TARGET_INITIALIZER;
22135 #include "gt-i386.h"