1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE */
1232 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1254 static enum stringop_alg stringop_alg = no_stringop;
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1284 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1288 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1292 /* The "default" register map used in 32bit mode. */
1294 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1296 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1297 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1298 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1299 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1300 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1302 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1305 static int const x86_64_int_parameter_registers[6] =
1307 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1308 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1311 static int const x86_64_int_return_registers[4] =
1313 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1316 /* The "default" register map used in 64bit mode. */
1317 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1319 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1320 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1321 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1322 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1323 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1324 8,9,10,11,12,13,14,15, /* extended integer registers */
1325 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1328 /* Define the register numbers to be used in Dwarf debugging information.
1329 The SVR4 reference port C compiler uses the following register numbers
1330 in its Dwarf output code:
1331 0 for %eax (gcc regno = 0)
1332 1 for %ecx (gcc regno = 2)
1333 2 for %edx (gcc regno = 1)
1334 3 for %ebx (gcc regno = 3)
1335 4 for %esp (gcc regno = 7)
1336 5 for %ebp (gcc regno = 6)
1337 6 for %esi (gcc regno = 4)
1338 7 for %edi (gcc regno = 5)
1339 The following three DWARF register numbers are never generated by
1340 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1341 believes these numbers have these meanings.
1342 8 for %eip (no gcc equivalent)
1343 9 for %eflags (gcc regno = 17)
1344 10 for %trapno (no gcc equivalent)
1345 It is not at all clear how we should number the FP stack registers
1346 for the x86 architecture. If the version of SDB on x86/svr4 were
1347 a bit less brain dead with respect to floating-point then we would
1348 have a precedent to follow with respect to DWARF register numbers
1349 for x86 FP registers, but the SDB on x86/svr4 is so completely
1350 broken with respect to FP registers that it is hardly worth thinking
1351 of it as something to strive for compatibility with.
1352 The version of x86/svr4 SDB I have at the moment does (partially)
1353 seem to believe that DWARF register number 11 is associated with
1354 the x86 register %st(0), but that's about all. Higher DWARF
1355 register numbers don't seem to be associated with anything in
1356 particular, and even for DWARF regno 11, SDB only seems to under-
1357 stand that it should say that a variable lives in %st(0) (when
1358 asked via an `=' command) if we said it was in DWARF regno 11,
1359 but SDB still prints garbage when asked for the value of the
1360 variable in question (via a `/' command).
1361 (Also note that the labels SDB prints for various FP stack regs
1362 when doing an `x' command are all wrong.)
1363 Note that these problems generally don't affect the native SVR4
1364 C compiler because it doesn't allow the use of -O with -g and
1365 because when it is *not* optimizing, it allocates a memory
1366 location for each floating-point variable, and the memory
1367 location is what gets described in the DWARF AT_location
1368 attribute for the variable in question.
1369 Regardless of the severe mental illness of the x86/svr4 SDB, we
1370 do something sensible here and we use the following DWARF
1371 register numbers. Note that these are all stack-top-relative
1373 11 for %st(0) (gcc regno = 8)
1374 12 for %st(1) (gcc regno = 9)
1375 13 for %st(2) (gcc regno = 10)
1376 14 for %st(3) (gcc regno = 11)
1377 15 for %st(4) (gcc regno = 12)
1378 16 for %st(5) (gcc regno = 13)
1379 17 for %st(6) (gcc regno = 14)
1380 18 for %st(7) (gcc regno = 15)
1382 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1384 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1385 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1386 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1387 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1388 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1389 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1393 /* Test and compare insns in i386.md store the information needed to
1394 generate branch and scc insns here. */
1396 rtx ix86_compare_op0 = NULL_RTX;
1397 rtx ix86_compare_op1 = NULL_RTX;
1398 rtx ix86_compare_emitted = NULL_RTX;
1400 /* Size of the register save area. */
1401 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1403 /* Define the structure for the machine field in struct function. */
1405 struct stack_local_entry GTY(())
1407 unsigned short mode;
1410 struct stack_local_entry *next;
1413 /* Structure describing stack frame layout.
1414 Stack grows downward:
1420 saved frame pointer if frame_pointer_needed
1421 <- HARD_FRAME_POINTER
1426 [va_arg registers] (
1427 > to_allocate <- FRAME_POINTER
1437 HOST_WIDE_INT frame;
1439 int outgoing_arguments_size;
1442 HOST_WIDE_INT to_allocate;
1443 /* The offsets relative to ARG_POINTER. */
1444 HOST_WIDE_INT frame_pointer_offset;
1445 HOST_WIDE_INT hard_frame_pointer_offset;
1446 HOST_WIDE_INT stack_pointer_offset;
1448 /* When save_regs_using_mov is set, emit prologue using
1449 move instead of push instructions. */
1450 bool save_regs_using_mov;
1453 /* Code model option. */
1454 enum cmodel ix86_cmodel;
1456 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1458 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1460 /* Which unit we are generating floating point math for. */
1461 enum fpmath_unit ix86_fpmath;
1463 /* Which cpu are we scheduling for. */
1464 enum processor_type ix86_tune;
1466 /* Which instruction set architecture to use. */
1467 enum processor_type ix86_arch;
1469 /* true if sse prefetch instruction is not NOOP. */
1470 int x86_prefetch_sse;
1472 /* ix86_regparm_string as a number */
1473 static int ix86_regparm;
1475 /* -mstackrealign option */
1476 extern int ix86_force_align_arg_pointer;
1477 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1479 /* Preferred alignment for stack boundary in bits. */
1480 unsigned int ix86_preferred_stack_boundary;
1482 /* Values 1-5: see jump.c */
1483 int ix86_branch_cost;
1485 /* Variables which are this size or smaller are put in the data/bss
1486 or ldata/lbss sections. */
1488 int ix86_section_threshold = 65536;
1490 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1491 char internal_label_prefix[16];
1492 int internal_label_prefix_len;
1494 /* Register class used for passing given 64bit part of the argument.
1495 These represent classes as documented by the PS ABI, with the exception
1496 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1497 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1499 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1500 whenever possible (upper half does contain padding). */
1501 enum x86_64_reg_class
1504 X86_64_INTEGER_CLASS,
1505 X86_64_INTEGERSI_CLASS,
1512 X86_64_COMPLEX_X87_CLASS,
1515 static const char * const x86_64_reg_class_name[] =
1517 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1518 "sseup", "x87", "x87up", "cplx87", "no"
1521 #define MAX_CLASSES 4
1523 /* Table of constants used by fldpi, fldln2, etc.... */
1524 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1525 static bool ext_80387_constants_init = 0;
1528 static struct machine_function * ix86_init_machine_status (void);
1529 static rtx ix86_function_value (tree, tree, bool);
1530 static int ix86_function_regparm (tree, tree);
1531 static void ix86_compute_frame_layout (struct ix86_frame *);
1532 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1536 /* The svr4 ABI for the i386 says that records and unions are returned
1538 #ifndef DEFAULT_PCC_STRUCT_RETURN
1539 #define DEFAULT_PCC_STRUCT_RETURN 1
1542 /* Implement TARGET_HANDLE_OPTION. */
1545 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1552 target_flags &= ~MASK_3DNOW_A;
1553 target_flags_explicit |= MASK_3DNOW_A;
1560 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1561 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1568 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1569 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1576 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1577 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1584 target_flags &= ~MASK_SSE4A;
1585 target_flags_explicit |= MASK_SSE4A;
1594 /* Sometimes certain combinations of command options do not make
1595 sense on a particular target machine. You can define a macro
1596 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1597 defined, is executed once just after all the command options have
1600 Don't use this macro to turn on various extra optimizations for
1601 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1604 override_options (void)
1607 int ix86_tune_defaulted = 0;
1608 unsigned int ix86_arch_mask, ix86_tune_mask;
1610 /* Comes from final.c -- no real reason to change it. */
1611 #define MAX_CODE_ALIGN 16
1615 const struct processor_costs *cost; /* Processor costs */
1616 const int target_enable; /* Target flags to enable. */
1617 const int target_disable; /* Target flags to disable. */
1618 const int align_loop; /* Default alignments. */
1619 const int align_loop_max_skip;
1620 const int align_jump;
1621 const int align_jump_max_skip;
1622 const int align_func;
1624 const processor_target_table[PROCESSOR_max] =
1626 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1627 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1628 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1629 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1630 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1631 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1632 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1633 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1634 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1635 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1636 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1637 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1638 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1639 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1642 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1645 const char *const name; /* processor name or nickname. */
1646 const enum processor_type processor;
1647 const enum pta_flags
1653 PTA_PREFETCH_SSE = 1 << 4,
1655 PTA_3DNOW_A = 1 << 6,
1659 PTA_POPCNT = 1 << 10,
1661 PTA_SSE4A = 1 << 12,
1662 PTA_NO_SAHF = 1 << 13
1665 const processor_alias_table[] =
1667 {"i386", PROCESSOR_I386, 0},
1668 {"i486", PROCESSOR_I486, 0},
1669 {"i586", PROCESSOR_PENTIUM, 0},
1670 {"pentium", PROCESSOR_PENTIUM, 0},
1671 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1672 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1673 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1674 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1675 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1676 {"i686", PROCESSOR_PENTIUMPRO, 0},
1677 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1678 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1679 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1680 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1681 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1682 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1683 | PTA_MMX | PTA_PREFETCH_SSE},
1684 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1685 | PTA_MMX | PTA_PREFETCH_SSE},
1686 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1687 | PTA_MMX | PTA_PREFETCH_SSE},
1688 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1689 | PTA_MMX | PTA_PREFETCH_SSE
1690 | PTA_CX16 | PTA_NO_SAHF},
1691 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1692 | PTA_64BIT | PTA_MMX
1693 | PTA_PREFETCH_SSE | PTA_CX16},
1694 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1696 {"k6", PROCESSOR_K6, PTA_MMX},
1697 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1698 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1699 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1701 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1702 | PTA_3DNOW | PTA_3DNOW_A},
1703 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1704 | PTA_3DNOW_A | PTA_SSE},
1705 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1706 | PTA_3DNOW_A | PTA_SSE},
1707 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1708 | PTA_3DNOW_A | PTA_SSE},
1709 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1710 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1711 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1712 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1714 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1715 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1716 | PTA_SSE2 | PTA_NO_SAHF},
1717 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1718 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1719 | PTA_SSE2 | PTA_NO_SAHF},
1720 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1722 | PTA_SSE2 | PTA_NO_SAHF},
1723 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1724 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1725 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1726 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1727 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1728 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1729 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1730 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1731 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1732 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1735 int const pta_size = ARRAY_SIZE (processor_alias_table);
1737 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1738 SUBTARGET_OVERRIDE_OPTIONS;
1741 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1742 SUBSUBTARGET_OVERRIDE_OPTIONS;
1745 /* -fPIC is the default for x86_64. */
1746 if (TARGET_MACHO && TARGET_64BIT)
1749 /* Set the default values for switches whose default depends on TARGET_64BIT
1750 in case they weren't overwritten by command line options. */
1753 /* Mach-O doesn't support omitting the frame pointer for now. */
1754 if (flag_omit_frame_pointer == 2)
1755 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1756 if (flag_asynchronous_unwind_tables == 2)
1757 flag_asynchronous_unwind_tables = 1;
1758 if (flag_pcc_struct_return == 2)
1759 flag_pcc_struct_return = 0;
1763 if (flag_omit_frame_pointer == 2)
1764 flag_omit_frame_pointer = 0;
1765 if (flag_asynchronous_unwind_tables == 2)
1766 flag_asynchronous_unwind_tables = 0;
1767 if (flag_pcc_struct_return == 2)
1768 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1771 /* Need to check -mtune=generic first. */
1772 if (ix86_tune_string)
1774 if (!strcmp (ix86_tune_string, "generic")
1775 || !strcmp (ix86_tune_string, "i686")
1776 /* As special support for cross compilers we read -mtune=native
1777 as -mtune=generic. With native compilers we won't see the
1778 -mtune=native, as it was changed by the driver. */
1779 || !strcmp (ix86_tune_string, "native"))
1782 ix86_tune_string = "generic64";
1784 ix86_tune_string = "generic32";
1786 else if (!strncmp (ix86_tune_string, "generic", 7))
1787 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1791 if (ix86_arch_string)
1792 ix86_tune_string = ix86_arch_string;
1793 if (!ix86_tune_string)
1795 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1796 ix86_tune_defaulted = 1;
1799 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1800 need to use a sensible tune option. */
1801 if (!strcmp (ix86_tune_string, "generic")
1802 || !strcmp (ix86_tune_string, "x86-64")
1803 || !strcmp (ix86_tune_string, "i686"))
1806 ix86_tune_string = "generic64";
1808 ix86_tune_string = "generic32";
1811 if (ix86_stringop_string)
1813 if (!strcmp (ix86_stringop_string, "rep_byte"))
1814 stringop_alg = rep_prefix_1_byte;
1815 else if (!strcmp (ix86_stringop_string, "libcall"))
1816 stringop_alg = libcall;
1817 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1818 stringop_alg = rep_prefix_4_byte;
1819 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1820 stringop_alg = rep_prefix_8_byte;
1821 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1822 stringop_alg = loop_1_byte;
1823 else if (!strcmp (ix86_stringop_string, "loop"))
1824 stringop_alg = loop;
1825 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1826 stringop_alg = unrolled_loop;
1828 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1830 if (!strcmp (ix86_tune_string, "x86-64"))
1831 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1832 "-mtune=generic instead as appropriate.");
1834 if (!ix86_arch_string)
1835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1836 if (!strcmp (ix86_arch_string, "generic"))
1837 error ("generic CPU can be used only for -mtune= switch");
1838 if (!strncmp (ix86_arch_string, "generic", 7))
1839 error ("bad value (%s) for -march= switch", ix86_arch_string);
1841 if (ix86_cmodel_string != 0)
1843 if (!strcmp (ix86_cmodel_string, "small"))
1844 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1845 else if (!strcmp (ix86_cmodel_string, "medium"))
1846 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1847 else if (!strcmp (ix86_cmodel_string, "large"))
1848 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1850 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1851 else if (!strcmp (ix86_cmodel_string, "32"))
1852 ix86_cmodel = CM_32;
1853 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1854 ix86_cmodel = CM_KERNEL;
1856 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1860 ix86_cmodel = CM_32;
1862 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1864 if (ix86_asm_string != 0)
1867 && !strcmp (ix86_asm_string, "intel"))
1868 ix86_asm_dialect = ASM_INTEL;
1869 else if (!strcmp (ix86_asm_string, "att"))
1870 ix86_asm_dialect = ASM_ATT;
1872 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1874 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1875 error ("code model %qs not supported in the %s bit mode",
1876 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1877 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1878 sorry ("%i-bit mode not compiled in",
1879 (target_flags & MASK_64BIT) ? 64 : 32);
1881 for (i = 0; i < pta_size; i++)
1882 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1884 ix86_arch = processor_alias_table[i].processor;
1885 /* Default cpu tuning to the architecture. */
1886 ix86_tune = ix86_arch;
1887 if (processor_alias_table[i].flags & PTA_MMX
1888 && !(target_flags_explicit & MASK_MMX))
1889 target_flags |= MASK_MMX;
1890 if (processor_alias_table[i].flags & PTA_3DNOW
1891 && !(target_flags_explicit & MASK_3DNOW))
1892 target_flags |= MASK_3DNOW;
1893 if (processor_alias_table[i].flags & PTA_3DNOW_A
1894 && !(target_flags_explicit & MASK_3DNOW_A))
1895 target_flags |= MASK_3DNOW_A;
1896 if (processor_alias_table[i].flags & PTA_SSE
1897 && !(target_flags_explicit & MASK_SSE))
1898 target_flags |= MASK_SSE;
1899 if (processor_alias_table[i].flags & PTA_SSE2
1900 && !(target_flags_explicit & MASK_SSE2))
1901 target_flags |= MASK_SSE2;
1902 if (processor_alias_table[i].flags & PTA_SSE3
1903 && !(target_flags_explicit & MASK_SSE3))
1904 target_flags |= MASK_SSE3;
1905 if (processor_alias_table[i].flags & PTA_SSSE3
1906 && !(target_flags_explicit & MASK_SSSE3))
1907 target_flags |= MASK_SSSE3;
1908 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1909 x86_prefetch_sse = true;
1910 if (processor_alias_table[i].flags & PTA_CX16)
1911 x86_cmpxchg16b = true;
1912 if (processor_alias_table[i].flags & PTA_POPCNT
1913 && !(target_flags_explicit & MASK_POPCNT))
1914 target_flags |= MASK_POPCNT;
1915 if (processor_alias_table[i].flags & PTA_ABM
1916 && !(target_flags_explicit & MASK_ABM))
1917 target_flags |= MASK_ABM;
1918 if (processor_alias_table[i].flags & PTA_SSE4A
1919 && !(target_flags_explicit & MASK_SSE4A))
1920 target_flags |= MASK_SSE4A;
1921 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1923 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1924 error ("CPU you selected does not support x86-64 "
1930 error ("bad value (%s) for -march= switch", ix86_arch_string);
1932 ix86_arch_mask = 1u << ix86_arch;
1933 for (i = 0; i < X86_ARCH_LAST; ++i)
1934 ix86_arch_features[i] &= ix86_arch_mask;
1936 for (i = 0; i < pta_size; i++)
1937 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1939 ix86_tune = processor_alias_table[i].processor;
1940 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1942 if (ix86_tune_defaulted)
1944 ix86_tune_string = "x86-64";
1945 for (i = 0; i < pta_size; i++)
1946 if (! strcmp (ix86_tune_string,
1947 processor_alias_table[i].name))
1949 ix86_tune = processor_alias_table[i].processor;
1952 error ("CPU you selected does not support x86-64 "
1955 /* Intel CPUs have always interpreted SSE prefetch instructions as
1956 NOPs; so, we can enable SSE prefetch instructions even when
1957 -mtune (rather than -march) points us to a processor that has them.
1958 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1959 higher processors. */
1960 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1961 x86_prefetch_sse = true;
1965 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1967 ix86_tune_mask = 1u << ix86_tune;
1968 for (i = 0; i < X86_TUNE_LAST; ++i)
1969 ix86_tune_features[i] &= ix86_tune_mask;
1972 ix86_cost = &size_cost;
1974 ix86_cost = processor_target_table[ix86_tune].cost;
1975 target_flags |= processor_target_table[ix86_tune].target_enable;
1976 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1978 /* Arrange to set up i386_stack_locals for all functions. */
1979 init_machine_status = ix86_init_machine_status;
1981 /* Validate -mregparm= value. */
1982 if (ix86_regparm_string)
1984 i = atoi (ix86_regparm_string);
1985 if (i < 0 || i > REGPARM_MAX)
1986 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1992 ix86_regparm = REGPARM_MAX;
1994 /* If the user has provided any of the -malign-* options,
1995 warn and use that value only if -falign-* is not set.
1996 Remove this code in GCC 3.2 or later. */
1997 if (ix86_align_loops_string)
1999 warning (0, "-malign-loops is obsolete, use -falign-loops");
2000 if (align_loops == 0)
2002 i = atoi (ix86_align_loops_string);
2003 if (i < 0 || i > MAX_CODE_ALIGN)
2004 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2006 align_loops = 1 << i;
2010 if (ix86_align_jumps_string)
2012 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2013 if (align_jumps == 0)
2015 i = atoi (ix86_align_jumps_string);
2016 if (i < 0 || i > MAX_CODE_ALIGN)
2017 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2019 align_jumps = 1 << i;
2023 if (ix86_align_funcs_string)
2025 warning (0, "-malign-functions is obsolete, use -falign-functions");
2026 if (align_functions == 0)
2028 i = atoi (ix86_align_funcs_string);
2029 if (i < 0 || i > MAX_CODE_ALIGN)
2030 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2032 align_functions = 1 << i;
2036 /* Default align_* from the processor table. */
2037 if (align_loops == 0)
2039 align_loops = processor_target_table[ix86_tune].align_loop;
2040 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2042 if (align_jumps == 0)
2044 align_jumps = processor_target_table[ix86_tune].align_jump;
2045 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2047 if (align_functions == 0)
2049 align_functions = processor_target_table[ix86_tune].align_func;
2052 /* Validate -mbranch-cost= value, or provide default. */
2053 ix86_branch_cost = ix86_cost->branch_cost;
2054 if (ix86_branch_cost_string)
2056 i = atoi (ix86_branch_cost_string);
2058 error ("-mbranch-cost=%d is not between 0 and 5", i);
2060 ix86_branch_cost = i;
2062 if (ix86_section_threshold_string)
2064 i = atoi (ix86_section_threshold_string);
2066 error ("-mlarge-data-threshold=%d is negative", i);
2068 ix86_section_threshold = i;
2071 if (ix86_tls_dialect_string)
2073 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2074 ix86_tls_dialect = TLS_DIALECT_GNU;
2075 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2076 ix86_tls_dialect = TLS_DIALECT_GNU2;
2077 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2078 ix86_tls_dialect = TLS_DIALECT_SUN;
2080 error ("bad value (%s) for -mtls-dialect= switch",
2081 ix86_tls_dialect_string);
2084 /* Keep nonleaf frame pointers. */
2085 if (flag_omit_frame_pointer)
2086 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2087 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2088 flag_omit_frame_pointer = 1;
2090 /* If we're doing fast math, we don't care about comparison order
2091 wrt NaNs. This lets us use a shorter comparison sequence. */
2092 if (flag_finite_math_only)
2093 target_flags &= ~MASK_IEEE_FP;
2095 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2096 since the insns won't need emulation. */
2097 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2098 target_flags &= ~MASK_NO_FANCY_MATH_387;
2100 /* Likewise, if the target doesn't have a 387, or we've specified
2101 software floating point, don't use 387 inline intrinsics. */
2103 target_flags |= MASK_NO_FANCY_MATH_387;
2105 /* Turn on SSE3 builtins for -mssse3. */
2107 target_flags |= MASK_SSE3;
2109 /* Turn on SSE3 builtins for -msse4a. */
2111 target_flags |= MASK_SSE3;
2113 /* Turn on SSE2 builtins for -msse3. */
2115 target_flags |= MASK_SSE2;
2117 /* Turn on SSE builtins for -msse2. */
2119 target_flags |= MASK_SSE;
2121 /* Turn on MMX builtins for -msse. */
2124 target_flags |= MASK_MMX & ~target_flags_explicit;
2125 x86_prefetch_sse = true;
2128 /* Turn on MMX builtins for 3Dnow. */
2130 target_flags |= MASK_MMX;
2132 /* Turn on POPCNT builtins for -mabm. */
2134 target_flags |= MASK_POPCNT;
2138 if (TARGET_ALIGN_DOUBLE)
2139 error ("-malign-double makes no sense in the 64bit mode");
2141 error ("-mrtd calling convention not supported in the 64bit mode");
2143 /* Enable by default the SSE and MMX builtins. Do allow the user to
2144 explicitly disable any of these. In particular, disabling SSE and
2145 MMX for kernel code is extremely useful. */
2147 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2148 & ~target_flags_explicit);
2152 /* i386 ABI does not specify red zone. It still makes sense to use it
2153 when programmer takes care to stack from being destroyed. */
2154 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2155 target_flags |= MASK_NO_RED_ZONE;
2158 /* Validate -mpreferred-stack-boundary= value, or provide default.
2159 The default of 128 bits is for Pentium III's SSE __m128. We can't
2160 change it because of optimize_size. Otherwise, we can't mix object
2161 files compiled with -Os and -On. */
2162 ix86_preferred_stack_boundary = 128;
2163 if (ix86_preferred_stack_boundary_string)
2165 i = atoi (ix86_preferred_stack_boundary_string);
2166 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2167 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2168 TARGET_64BIT ? 4 : 2);
2170 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2173 /* Accept -msseregparm only if at least SSE support is enabled. */
2174 if (TARGET_SSEREGPARM
2176 error ("-msseregparm used without SSE enabled");
2178 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2179 if (ix86_fpmath_string != 0)
2181 if (! strcmp (ix86_fpmath_string, "387"))
2182 ix86_fpmath = FPMATH_387;
2183 else if (! strcmp (ix86_fpmath_string, "sse"))
2187 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2188 ix86_fpmath = FPMATH_387;
2191 ix86_fpmath = FPMATH_SSE;
2193 else if (! strcmp (ix86_fpmath_string, "387,sse")
2194 || ! strcmp (ix86_fpmath_string, "sse,387"))
2198 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2199 ix86_fpmath = FPMATH_387;
2201 else if (!TARGET_80387)
2203 warning (0, "387 instruction set disabled, using SSE arithmetics");
2204 ix86_fpmath = FPMATH_SSE;
2207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2213 /* If the i387 is disabled, then do not return values in it. */
2215 target_flags &= ~MASK_FLOAT_RETURNS;
2217 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2218 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2220 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2222 /* ??? Unwind info is not correct around the CFG unless either a frame
2223 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2224 unwind info generation to be aware of the CFG and propagating states
2226 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2227 || flag_exceptions || flag_non_call_exceptions)
2228 && flag_omit_frame_pointer
2229 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2231 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2232 warning (0, "unwind tables currently require either a frame pointer "
2233 "or -maccumulate-outgoing-args for correctness");
2234 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2237 /* For sane SSE instruction set generation we need fcomi instruction.
2238 It is safe to enable all CMOVE instructions. */
2242 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2245 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2246 p = strchr (internal_label_prefix, 'X');
2247 internal_label_prefix_len = p - internal_label_prefix;
2251 /* When scheduling description is not available, disable scheduler pass
2252 so it won't slow down the compilation and make x87 code slower. */
2253 if (!TARGET_SCHEDULE)
2254 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2256 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2257 set_param_value ("simultaneous-prefetches",
2258 ix86_cost->simultaneous_prefetches);
2259 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2260 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2263 /* Return true if this goes in large data/bss. */
2266 ix86_in_large_data_p (tree exp)
2268 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2271 /* Functions are never large data. */
2272 if (TREE_CODE (exp) == FUNCTION_DECL)
2275 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2277 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2278 if (strcmp (section, ".ldata") == 0
2279 || strcmp (section, ".lbss") == 0)
2285 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2287 /* If this is an incomplete type with size 0, then we can't put it
2288 in data because it might be too big when completed. */
2289 if (!size || size > ix86_section_threshold)
2296 /* Switch to the appropriate section for output of DECL.
2297 DECL is either a `VAR_DECL' node or a constant of some sort.
2298 RELOC indicates whether forming the initial value of DECL requires
2299 link-time relocations. */
2301 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2305 x86_64_elf_select_section (tree decl, int reloc,
2306 unsigned HOST_WIDE_INT align)
2308 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2309 && ix86_in_large_data_p (decl))
2311 const char *sname = NULL;
2312 unsigned int flags = SECTION_WRITE;
2313 switch (categorize_decl_for_section (decl, reloc))
2318 case SECCAT_DATA_REL:
2319 sname = ".ldata.rel";
2321 case SECCAT_DATA_REL_LOCAL:
2322 sname = ".ldata.rel.local";
2324 case SECCAT_DATA_REL_RO:
2325 sname = ".ldata.rel.ro";
2327 case SECCAT_DATA_REL_RO_LOCAL:
2328 sname = ".ldata.rel.ro.local";
2332 flags |= SECTION_BSS;
2335 case SECCAT_RODATA_MERGE_STR:
2336 case SECCAT_RODATA_MERGE_STR_INIT:
2337 case SECCAT_RODATA_MERGE_CONST:
2341 case SECCAT_SRODATA:
2348 /* We don't split these for medium model. Place them into
2349 default sections and hope for best. */
2354 /* We might get called with string constants, but get_named_section
2355 doesn't like them as they are not DECLs. Also, we need to set
2356 flags in that case. */
2358 return get_section (sname, flags, NULL);
2359 return get_named_section (decl, sname, reloc);
2362 return default_elf_select_section (decl, reloc, align);
2365 /* Build up a unique section name, expressed as a
2366 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2367 RELOC indicates whether the initial value of EXP requires
2368 link-time relocations. */
2370 static void ATTRIBUTE_UNUSED
2371 x86_64_elf_unique_section (tree decl, int reloc)
2373 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2374 && ix86_in_large_data_p (decl))
2376 const char *prefix = NULL;
2377 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2378 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2380 switch (categorize_decl_for_section (decl, reloc))
2383 case SECCAT_DATA_REL:
2384 case SECCAT_DATA_REL_LOCAL:
2385 case SECCAT_DATA_REL_RO:
2386 case SECCAT_DATA_REL_RO_LOCAL:
2387 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2390 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2393 case SECCAT_RODATA_MERGE_STR:
2394 case SECCAT_RODATA_MERGE_STR_INIT:
2395 case SECCAT_RODATA_MERGE_CONST:
2396 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2398 case SECCAT_SRODATA:
2405 /* We don't split these for medium model. Place them into
2406 default sections and hope for best. */
2414 plen = strlen (prefix);
2416 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2417 name = targetm.strip_name_encoding (name);
2418 nlen = strlen (name);
2420 string = alloca (nlen + plen + 1);
2421 memcpy (string, prefix, plen);
2422 memcpy (string + plen, name, nlen + 1);
2424 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2428 default_unique_section (decl, reloc);
2431 #ifdef COMMON_ASM_OP
2432 /* This says how to output assembler code to declare an
2433 uninitialized external linkage data object.
2435 For medium model x86-64 we need to use .largecomm opcode for
2438 x86_elf_aligned_common (FILE *file,
2439 const char *name, unsigned HOST_WIDE_INT size,
2442 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2443 && size > (unsigned int)ix86_section_threshold)
2444 fprintf (file, ".largecomm\t");
2446 fprintf (file, "%s", COMMON_ASM_OP);
2447 assemble_name (file, name);
2448 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2449 size, align / BITS_PER_UNIT);
2453 /* Utility function for targets to use in implementing
2454 ASM_OUTPUT_ALIGNED_BSS. */
2457 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2458 const char *name, unsigned HOST_WIDE_INT size,
2461 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2462 && size > (unsigned int)ix86_section_threshold)
2463 switch_to_section (get_named_section (decl, ".lbss", 0));
2465 switch_to_section (bss_section);
2466 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2467 #ifdef ASM_DECLARE_OBJECT_NAME
2468 last_assemble_variable_decl = decl;
2469 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2471 /* Standard thing is just output label for the object. */
2472 ASM_OUTPUT_LABEL (file, name);
2473 #endif /* ASM_DECLARE_OBJECT_NAME */
2474 ASM_OUTPUT_SKIP (file, size ? size : 1);
2478 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2480 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2481 make the problem with not enough registers even worse. */
2482 #ifdef INSN_SCHEDULING
2484 flag_schedule_insns = 0;
2488 /* The Darwin libraries never set errno, so we might as well
2489 avoid calling them when that's the only reason we would. */
2490 flag_errno_math = 0;
2492 /* The default values of these switches depend on the TARGET_64BIT
2493 that is not known at this moment. Mark these values with 2 and
2494 let user the to override these. In case there is no command line option
2495 specifying them, we will set the defaults in override_options. */
2497 flag_omit_frame_pointer = 2;
2498 flag_pcc_struct_return = 2;
2499 flag_asynchronous_unwind_tables = 2;
2500 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2501 SUBTARGET_OPTIMIZATION_OPTIONS;
2505 /* Decide whether we can make a sibling call to a function. DECL is the
2506 declaration of the function being targeted by the call and EXP is the
2507 CALL_EXPR representing the call. */
2510 ix86_function_ok_for_sibcall (tree decl, tree exp)
2515 /* If we are generating position-independent code, we cannot sibcall
2516 optimize any indirect call, or a direct call to a global function,
2517 as the PLT requires %ebx be live. */
2518 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2525 func = TREE_TYPE (CALL_EXPR_FN (exp));
2526 if (POINTER_TYPE_P (func))
2527 func = TREE_TYPE (func);
2530 /* Check that the return value locations are the same. Like
2531 if we are returning floats on the 80387 register stack, we cannot
2532 make a sibcall from a function that doesn't return a float to a
2533 function that does or, conversely, from a function that does return
2534 a float to a function that doesn't; the necessary stack adjustment
2535 would not be executed. This is also the place we notice
2536 differences in the return value ABI. Note that it is ok for one
2537 of the functions to have void return type as long as the return
2538 value of the other is passed in a register. */
2539 a = ix86_function_value (TREE_TYPE (exp), func, false);
2540 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2542 if (STACK_REG_P (a) || STACK_REG_P (b))
2544 if (!rtx_equal_p (a, b))
2547 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2549 else if (!rtx_equal_p (a, b))
2552 /* If this call is indirect, we'll need to be able to use a call-clobbered
2553 register for the address of the target function. Make sure that all
2554 such registers are not used for passing parameters. */
2555 if (!decl && !TARGET_64BIT)
2559 /* We're looking at the CALL_EXPR, we need the type of the function. */
2560 type = CALL_EXPR_FN (exp); /* pointer expression */
2561 type = TREE_TYPE (type); /* pointer type */
2562 type = TREE_TYPE (type); /* function type */
2564 if (ix86_function_regparm (type, NULL) >= 3)
2566 /* ??? Need to count the actual number of registers to be used,
2567 not the possible number of registers. Fix later. */
2572 /* Dllimport'd functions are also called indirectly. */
2573 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2574 && decl && DECL_DLLIMPORT_P (decl)
2575 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2578 /* If we forced aligned the stack, then sibcalling would unalign the
2579 stack, which may break the called function. */
2580 if (cfun->machine->force_align_arg_pointer)
2583 /* Otherwise okay. That also includes certain types of indirect calls. */
2587 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2588 calling convention attributes;
2589 arguments as in struct attribute_spec.handler. */
2592 ix86_handle_cconv_attribute (tree *node, tree name,
2594 int flags ATTRIBUTE_UNUSED,
2597 if (TREE_CODE (*node) != FUNCTION_TYPE
2598 && TREE_CODE (*node) != METHOD_TYPE
2599 && TREE_CODE (*node) != FIELD_DECL
2600 && TREE_CODE (*node) != TYPE_DECL)
2602 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2603 IDENTIFIER_POINTER (name));
2604 *no_add_attrs = true;
2608 /* Can combine regparm with all attributes but fastcall. */
2609 if (is_attribute_p ("regparm", name))
2613 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2615 error ("fastcall and regparm attributes are not compatible");
2618 cst = TREE_VALUE (args);
2619 if (TREE_CODE (cst) != INTEGER_CST)
2621 warning (OPT_Wattributes,
2622 "%qs attribute requires an integer constant argument",
2623 IDENTIFIER_POINTER (name));
2624 *no_add_attrs = true;
2626 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2628 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2629 IDENTIFIER_POINTER (name), REGPARM_MAX);
2630 *no_add_attrs = true;
2634 && lookup_attribute (ix86_force_align_arg_pointer_string,
2635 TYPE_ATTRIBUTES (*node))
2636 && compare_tree_int (cst, REGPARM_MAX-1))
2638 error ("%s functions limited to %d register parameters",
2639 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2647 warning (OPT_Wattributes, "%qs attribute ignored",
2648 IDENTIFIER_POINTER (name));
2649 *no_add_attrs = true;
2653 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2654 if (is_attribute_p ("fastcall", name))
2656 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2658 error ("fastcall and cdecl attributes are not compatible");
2660 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2662 error ("fastcall and stdcall attributes are not compatible");
2664 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2666 error ("fastcall and regparm attributes are not compatible");
2670 /* Can combine stdcall with fastcall (redundant), regparm and
2672 else if (is_attribute_p ("stdcall", name))
2674 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2676 error ("stdcall and cdecl attributes are not compatible");
2678 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2680 error ("stdcall and fastcall attributes are not compatible");
2684 /* Can combine cdecl with regparm and sseregparm. */
2685 else if (is_attribute_p ("cdecl", name))
2687 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2689 error ("stdcall and cdecl attributes are not compatible");
2691 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2693 error ("fastcall and cdecl attributes are not compatible");
2697 /* Can combine sseregparm with all attributes. */
2702 /* Return 0 if the attributes for two types are incompatible, 1 if they
2703 are compatible, and 2 if they are nearly compatible (which causes a
2704 warning to be generated). */
2707 ix86_comp_type_attributes (tree type1, tree type2)
2709 /* Check for mismatch of non-default calling convention. */
2710 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2712 if (TREE_CODE (type1) != FUNCTION_TYPE)
2715 /* Check for mismatched fastcall/regparm types. */
2716 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2717 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2718 || (ix86_function_regparm (type1, NULL)
2719 != ix86_function_regparm (type2, NULL)))
2722 /* Check for mismatched sseregparm types. */
2723 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2724 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2727 /* Check for mismatched return types (cdecl vs stdcall). */
2728 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2729 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2735 /* Return the regparm value for a function with the indicated TYPE and DECL.
2736 DECL may be NULL when calling function indirectly
2737 or considering a libcall. */
2740 ix86_function_regparm (tree type, tree decl)
2743 int regparm = ix86_regparm;
2748 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2750 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2752 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2755 /* Use register calling convention for local functions when possible. */
2756 if (decl && flag_unit_at_a_time && !profile_flag)
2758 struct cgraph_local_info *i = cgraph_local_info (decl);
2761 int local_regparm, globals = 0, regno;
2764 /* Make sure no regparm register is taken by a
2765 global register variable. */
2766 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2767 if (global_regs[local_regparm])
2770 /* We can't use regparm(3) for nested functions as these use
2771 static chain pointer in third argument. */
2772 if (local_regparm == 3
2773 && decl_function_context (decl)
2774 && !DECL_NO_STATIC_CHAIN (decl))
2777 /* If the function realigns its stackpointer, the prologue will
2778 clobber %ecx. If we've already generated code for the callee,
2779 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2780 scanning the attributes for the self-realigning property. */
2781 f = DECL_STRUCT_FUNCTION (decl);
2782 if (local_regparm == 3
2783 && (f ? !!f->machine->force_align_arg_pointer
2784 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2785 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2788 /* Each global register variable increases register preassure,
2789 so the more global reg vars there are, the smaller regparm
2790 optimization use, unless requested by the user explicitly. */
2791 for (regno = 0; regno < 6; regno++)
2792 if (global_regs[regno])
2795 = globals < local_regparm ? local_regparm - globals : 0;
2797 if (local_regparm > regparm)
2798 regparm = local_regparm;
2805 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2806 DFmode (2) arguments in SSE registers for a function with the
2807 indicated TYPE and DECL. DECL may be NULL when calling function
2808 indirectly or considering a libcall. Otherwise return 0. */
2811 ix86_function_sseregparm (tree type, tree decl)
2813 gcc_assert (!TARGET_64BIT);
2815 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2816 by the sseregparm attribute. */
2817 if (TARGET_SSEREGPARM
2818 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2823 error ("Calling %qD with attribute sseregparm without "
2824 "SSE/SSE2 enabled", decl);
2826 error ("Calling %qT with attribute sseregparm without "
2827 "SSE/SSE2 enabled", type);
2834 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2835 (and DFmode for SSE2) arguments in SSE registers. */
2836 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2838 struct cgraph_local_info *i = cgraph_local_info (decl);
2840 return TARGET_SSE2 ? 2 : 1;
2846 /* Return true if EAX is live at the start of the function. Used by
2847 ix86_expand_prologue to determine if we need special help before
2848 calling allocate_stack_worker. */
2851 ix86_eax_live_at_start_p (void)
2853 /* Cheat. Don't bother working forward from ix86_function_regparm
2854 to the function type to whether an actual argument is located in
2855 eax. Instead just look at cfg info, which is still close enough
2856 to correct at this point. This gives false positives for broken
2857 functions that might use uninitialized data that happens to be
2858 allocated in eax, but who cares? */
2859 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2862 /* Return true if TYPE has a variable argument list. */
2865 type_has_variadic_args_p (tree type)
2869 for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
2870 if (t == void_list_node)
2875 /* Value is the number of bytes of arguments automatically
2876 popped when returning from a subroutine call.
2877 FUNDECL is the declaration node of the function (as a tree),
2878 FUNTYPE is the data type of the function (as a tree),
2879 or for a library call it is an identifier node for the subroutine name.
2880 SIZE is the number of bytes of arguments passed on the stack.
2882 On the 80386, the RTD insn may be used to pop them if the number
2883 of args is fixed, but if the number is variable then the caller
2884 must pop them all. RTD can't be used for library calls now
2885 because the library is compiled with the Unix compiler.
2886 Use of RTD is a selectable option, since it is incompatible with
2887 standard Unix calling sequences. If the option is not selected,
2888 the caller must always pop the args.
2890 The attribute stdcall is equivalent to RTD on a per module basis. */
2893 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2897 /* None of the 64-bit ABIs pop arguments. */
2901 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2903 /* Cdecl functions override -mrtd, and never pop the stack. */
2904 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2906 /* Stdcall and fastcall functions will pop the stack if not
2908 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2909 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2912 if (rtd && ! type_has_variadic_args_p (funtype))
2916 /* Lose any fake structure return argument if it is passed on the stack. */
2917 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2918 && !KEEP_AGGREGATE_RETURN_POINTER)
2920 int nregs = ix86_function_regparm (funtype, fundecl);
2922 return GET_MODE_SIZE (Pmode);
2928 /* Argument support functions. */
2930 /* Return true when register may be used to pass function parameters. */
2932 ix86_function_arg_regno_p (int regno)
2939 return (regno < REGPARM_MAX
2940 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2942 return (regno < REGPARM_MAX
2943 || (TARGET_MMX && MMX_REGNO_P (regno)
2944 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2945 || (TARGET_SSE && SSE_REGNO_P (regno)
2946 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2951 if (SSE_REGNO_P (regno) && TARGET_SSE)
2956 if (TARGET_SSE && SSE_REGNO_P (regno)
2957 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2961 /* RAX is used as hidden argument to va_arg functions. */
2965 for (i = 0; i < REGPARM_MAX; i++)
2966 if (regno == x86_64_int_parameter_registers[i])
2971 /* Return if we do not know how to pass TYPE solely in registers. */
2974 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2976 if (must_pass_in_stack_var_size_or_pad (mode, type))
2979 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2980 The layout_type routine is crafty and tries to trick us into passing
2981 currently unsupported vector types on the stack by using TImode. */
2982 return (!TARGET_64BIT && mode == TImode
2983 && type && TREE_CODE (type) != VECTOR_TYPE);
2986 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2987 for a call to a function whose data type is FNTYPE.
2988 For a library call, FNTYPE is 0. */
2991 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2992 tree fntype, /* tree ptr for function decl */
2993 rtx libname, /* SYMBOL_REF of library name or 0 */
2996 memset (cum, 0, sizeof (*cum));
2998 /* Set up the number of registers to use for passing arguments. */
2999 cum->nregs = ix86_regparm;
3001 cum->sse_nregs = SSE_REGPARM_MAX;
3003 cum->mmx_nregs = MMX_REGPARM_MAX;
3004 cum->warn_sse = true;
3005 cum->warn_mmx = true;
3006 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3010 /* If there are variable arguments, then we won't pass anything
3011 in registers in 32-bit mode. */
3012 if (cum->maybe_vaarg)
3022 /* Use ecx and edx registers if function has fastcall attribute,
3023 else look for regparm information. */
3026 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3032 cum->nregs = ix86_function_regparm (fntype, fndecl);
3035 /* Set up the number of SSE registers used for passing SFmode
3036 and DFmode arguments. Warn for mismatching ABI. */
3037 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3041 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3042 But in the case of vector types, it is some vector mode.
3044 When we have only some of our vector isa extensions enabled, then there
3045 are some modes for which vector_mode_supported_p is false. For these
3046 modes, the generic vector support in gcc will choose some non-vector mode
3047 in order to implement the type. By computing the natural mode, we'll
3048 select the proper ABI location for the operand and not depend on whatever
3049 the middle-end decides to do with these vector types. */
3051 static enum machine_mode
3052 type_natural_mode (tree type)
3054 enum machine_mode mode = TYPE_MODE (type);
3056 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3058 HOST_WIDE_INT size = int_size_in_bytes (type);
3059 if ((size == 8 || size == 16)
3060 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3061 && TYPE_VECTOR_SUBPARTS (type) > 1)
3063 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3065 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3066 mode = MIN_MODE_VECTOR_FLOAT;
3068 mode = MIN_MODE_VECTOR_INT;
3070 /* Get the mode which has this inner mode and number of units. */
3071 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3072 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3073 && GET_MODE_INNER (mode) == innermode)
3083 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3084 this may not agree with the mode that the type system has chosen for the
3085 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3086 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3089 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3094 if (orig_mode != BLKmode)
3095 tmp = gen_rtx_REG (orig_mode, regno);
3098 tmp = gen_rtx_REG (mode, regno);
3099 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3100 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3106 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3107 of this code is to classify each 8bytes of incoming argument by the register
3108 class and assign registers accordingly. */
3110 /* Return the union class of CLASS1 and CLASS2.
3111 See the x86-64 PS ABI for details. */
3113 static enum x86_64_reg_class
3114 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3116 /* Rule #1: If both classes are equal, this is the resulting class. */
3117 if (class1 == class2)
3120 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3122 if (class1 == X86_64_NO_CLASS)
3124 if (class2 == X86_64_NO_CLASS)
3127 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3128 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3129 return X86_64_MEMORY_CLASS;
3131 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3132 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3133 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3134 return X86_64_INTEGERSI_CLASS;
3135 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3136 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3137 return X86_64_INTEGER_CLASS;
3139 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3141 if (class1 == X86_64_X87_CLASS
3142 || class1 == X86_64_X87UP_CLASS
3143 || class1 == X86_64_COMPLEX_X87_CLASS
3144 || class2 == X86_64_X87_CLASS
3145 || class2 == X86_64_X87UP_CLASS
3146 || class2 == X86_64_COMPLEX_X87_CLASS)
3147 return X86_64_MEMORY_CLASS;
3149 /* Rule #6: Otherwise class SSE is used. */
3150 return X86_64_SSE_CLASS;
3153 /* Classify the argument of type TYPE and mode MODE.
3154 CLASSES will be filled by the register class used to pass each word
3155 of the operand. The number of words is returned. In case the parameter
3156 should be passed in memory, 0 is returned. As a special case for zero
3157 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3159 BIT_OFFSET is used internally for handling records and specifies offset
3160 of the offset in bits modulo 256 to avoid overflow cases.
3162 See the x86-64 PS ABI for details.
3166 classify_argument (enum machine_mode mode, tree type,
3167 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3169 HOST_WIDE_INT bytes =
3170 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3171 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3173 /* Variable sized entities are always passed/returned in memory. */
3177 if (mode != VOIDmode
3178 && targetm.calls.must_pass_in_stack (mode, type))
3181 if (type && AGGREGATE_TYPE_P (type))
3185 enum x86_64_reg_class subclasses[MAX_CLASSES];
3187 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3191 for (i = 0; i < words; i++)
3192 classes[i] = X86_64_NO_CLASS;
3194 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3195 signalize memory class, so handle it as special case. */
3198 classes[0] = X86_64_NO_CLASS;
3202 /* Classify each field of record and merge classes. */
3203 switch (TREE_CODE (type))
3206 /* And now merge the fields of structure. */
3207 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3209 if (TREE_CODE (field) == FIELD_DECL)
3213 if (TREE_TYPE (field) == error_mark_node)
3216 /* Bitfields are always classified as integer. Handle them
3217 early, since later code would consider them to be
3218 misaligned integers. */
3219 if (DECL_BIT_FIELD (field))
3221 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3222 i < ((int_bit_position (field) + (bit_offset % 64))
3223 + tree_low_cst (DECL_SIZE (field), 0)
3226 merge_classes (X86_64_INTEGER_CLASS,
3231 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3232 TREE_TYPE (field), subclasses,
3233 (int_bit_position (field)
3234 + bit_offset) % 256);
3237 for (i = 0; i < num; i++)
3240 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3242 merge_classes (subclasses[i], classes[i + pos]);
3250 /* Arrays are handled as small records. */
3253 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3254 TREE_TYPE (type), subclasses, bit_offset);
3258 /* The partial classes are now full classes. */
3259 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3260 subclasses[0] = X86_64_SSE_CLASS;
3261 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3262 subclasses[0] = X86_64_INTEGER_CLASS;
3264 for (i = 0; i < words; i++)
3265 classes[i] = subclasses[i % num];
3270 case QUAL_UNION_TYPE:
3271 /* Unions are similar to RECORD_TYPE but offset is always 0.
3273 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3275 if (TREE_CODE (field) == FIELD_DECL)
3279 if (TREE_TYPE (field) == error_mark_node)
3282 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3283 TREE_TYPE (field), subclasses,
3287 for (i = 0; i < num; i++)
3288 classes[i] = merge_classes (subclasses[i], classes[i]);
3297 /* Final merger cleanup. */
3298 for (i = 0; i < words; i++)
3300 /* If one class is MEMORY, everything should be passed in
3302 if (classes[i] == X86_64_MEMORY_CLASS)
3305 /* The X86_64_SSEUP_CLASS should be always preceded by
3306 X86_64_SSE_CLASS. */
3307 if (classes[i] == X86_64_SSEUP_CLASS
3308 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3309 classes[i] = X86_64_SSE_CLASS;
3311 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3312 if (classes[i] == X86_64_X87UP_CLASS
3313 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3314 classes[i] = X86_64_SSE_CLASS;
3319 /* Compute alignment needed. We align all types to natural boundaries with
3320 exception of XFmode that is aligned to 64bits. */
3321 if (mode != VOIDmode && mode != BLKmode)
3323 int mode_alignment = GET_MODE_BITSIZE (mode);
3326 mode_alignment = 128;
3327 else if (mode == XCmode)
3328 mode_alignment = 256;
3329 if (COMPLEX_MODE_P (mode))
3330 mode_alignment /= 2;
3331 /* Misaligned fields are always returned in memory. */
3332 if (bit_offset % mode_alignment)
3336 /* for V1xx modes, just use the base mode */
3337 if (VECTOR_MODE_P (mode)
3338 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3339 mode = GET_MODE_INNER (mode);
3341 /* Classification of atomic types. */
3346 classes[0] = X86_64_SSE_CLASS;
3349 classes[0] = X86_64_SSE_CLASS;
3350 classes[1] = X86_64_SSEUP_CLASS;
3359 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3360 classes[0] = X86_64_INTEGERSI_CLASS;
3362 classes[0] = X86_64_INTEGER_CLASS;
3366 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3371 if (!(bit_offset % 64))
3372 classes[0] = X86_64_SSESF_CLASS;
3374 classes[0] = X86_64_SSE_CLASS;
3377 classes[0] = X86_64_SSEDF_CLASS;
3380 classes[0] = X86_64_X87_CLASS;
3381 classes[1] = X86_64_X87UP_CLASS;
3384 classes[0] = X86_64_SSE_CLASS;
3385 classes[1] = X86_64_SSEUP_CLASS;
3388 classes[0] = X86_64_SSE_CLASS;
3391 classes[0] = X86_64_SSEDF_CLASS;
3392 classes[1] = X86_64_SSEDF_CLASS;
3395 classes[0] = X86_64_COMPLEX_X87_CLASS;
3398 /* This modes is larger than 16 bytes. */
3406 classes[0] = X86_64_SSE_CLASS;
3407 classes[1] = X86_64_SSEUP_CLASS;
3413 classes[0] = X86_64_SSE_CLASS;
3419 gcc_assert (VECTOR_MODE_P (mode));
3424 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3426 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3427 classes[0] = X86_64_INTEGERSI_CLASS;
3429 classes[0] = X86_64_INTEGER_CLASS;
3430 classes[1] = X86_64_INTEGER_CLASS;
3431 return 1 + (bytes > 8);
3435 /* Examine the argument and return set number of register required in each
3436 class. Return 0 iff parameter should be passed in memory. */
3438 examine_argument (enum machine_mode mode, tree type, int in_return,
3439 int *int_nregs, int *sse_nregs)
3441 enum x86_64_reg_class class[MAX_CLASSES];
3442 int n = classify_argument (mode, type, class, 0);
3448 for (n--; n >= 0; n--)
3451 case X86_64_INTEGER_CLASS:
3452 case X86_64_INTEGERSI_CLASS:
3455 case X86_64_SSE_CLASS:
3456 case X86_64_SSESF_CLASS:
3457 case X86_64_SSEDF_CLASS:
3460 case X86_64_NO_CLASS:
3461 case X86_64_SSEUP_CLASS:
3463 case X86_64_X87_CLASS:
3464 case X86_64_X87UP_CLASS:
3468 case X86_64_COMPLEX_X87_CLASS:
3469 return in_return ? 2 : 0;
3470 case X86_64_MEMORY_CLASS:
3476 /* Construct container for the argument used by GCC interface. See
3477 FUNCTION_ARG for the detailed description. */
3480 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3481 tree type, int in_return, int nintregs, int nsseregs,
3482 const int *intreg, int sse_regno)
3484 /* The following variables hold the static issued_error state. */
3485 static bool issued_sse_arg_error;
3486 static bool issued_sse_ret_error;
3487 static bool issued_x87_ret_error;
3489 enum machine_mode tmpmode;
3491 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3492 enum x86_64_reg_class class[MAX_CLASSES];
3496 int needed_sseregs, needed_intregs;
3497 rtx exp[MAX_CLASSES];
3500 n = classify_argument (mode, type, class, 0);
3503 if (!examine_argument (mode, type, in_return, &needed_intregs,
3506 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3509 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3510 some less clueful developer tries to use floating-point anyway. */
3511 if (needed_sseregs && !TARGET_SSE)
3515 if (!issued_sse_ret_error)
3517 error ("SSE register return with SSE disabled");
3518 issued_sse_ret_error = true;
3521 else if (!issued_sse_arg_error)
3523 error ("SSE register argument with SSE disabled");
3524 issued_sse_arg_error = true;
3529 /* Likewise, error if the ABI requires us to return values in the
3530 x87 registers and the user specified -mno-80387. */
3531 if (!TARGET_80387 && in_return)
3532 for (i = 0; i < n; i++)
3533 if (class[i] == X86_64_X87_CLASS
3534 || class[i] == X86_64_X87UP_CLASS
3535 || class[i] == X86_64_COMPLEX_X87_CLASS)
3537 if (!issued_x87_ret_error)
3539 error ("x87 register return with x87 disabled");
3540 issued_x87_ret_error = true;
3545 /* First construct simple cases. Avoid SCmode, since we want to use
3546 single register to pass this type. */
3547 if (n == 1 && mode != SCmode)
3550 case X86_64_INTEGER_CLASS:
3551 case X86_64_INTEGERSI_CLASS:
3552 return gen_rtx_REG (mode, intreg[0]);
3553 case X86_64_SSE_CLASS:
3554 case X86_64_SSESF_CLASS:
3555 case X86_64_SSEDF_CLASS:
3556 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3557 case X86_64_X87_CLASS:
3558 case X86_64_COMPLEX_X87_CLASS:
3559 return gen_rtx_REG (mode, FIRST_STACK_REG);
3560 case X86_64_NO_CLASS:
3561 /* Zero sized array, struct or class. */
3566 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3568 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3571 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3572 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3573 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3574 && class[1] == X86_64_INTEGER_CLASS
3575 && (mode == CDImode || mode == TImode || mode == TFmode)
3576 && intreg[0] + 1 == intreg[1])
3577 return gen_rtx_REG (mode, intreg[0]);
3579 /* Otherwise figure out the entries of the PARALLEL. */
3580 for (i = 0; i < n; i++)
3584 case X86_64_NO_CLASS:
3586 case X86_64_INTEGER_CLASS:
3587 case X86_64_INTEGERSI_CLASS:
3588 /* Merge TImodes on aligned occasions here too. */
3589 if (i * 8 + 8 > bytes)
3590 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3591 else if (class[i] == X86_64_INTEGERSI_CLASS)
3595 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3596 if (tmpmode == BLKmode)
3598 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3599 gen_rtx_REG (tmpmode, *intreg),
3603 case X86_64_SSESF_CLASS:
3604 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3605 gen_rtx_REG (SFmode,
3606 SSE_REGNO (sse_regno)),
3610 case X86_64_SSEDF_CLASS:
3611 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3612 gen_rtx_REG (DFmode,
3613 SSE_REGNO (sse_regno)),
3617 case X86_64_SSE_CLASS:
3618 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3622 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3623 gen_rtx_REG (tmpmode,
3624 SSE_REGNO (sse_regno)),
3626 if (tmpmode == TImode)
3635 /* Empty aligned struct, union or class. */
3639 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3640 for (i = 0; i < nexps; i++)
3641 XVECEXP (ret, 0, i) = exp [i];
3645 /* Update the data in CUM to advance over an argument of mode MODE
3646 and data type TYPE. (TYPE is null for libcalls where that information
3647 may not be available.) */
3650 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3651 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3667 cum->words += words;
3668 cum->nregs -= words;
3669 cum->regno += words;
3671 if (cum->nregs <= 0)
3679 if (cum->float_in_sse < 2)
3682 if (cum->float_in_sse < 1)
3693 if (!type || !AGGREGATE_TYPE_P (type))
3695 cum->sse_words += words;
3696 cum->sse_nregs -= 1;
3697 cum->sse_regno += 1;
3698 if (cum->sse_nregs <= 0)
3710 if (!type || !AGGREGATE_TYPE_P (type))
3712 cum->mmx_words += words;
3713 cum->mmx_nregs -= 1;
3714 cum->mmx_regno += 1;
3715 if (cum->mmx_nregs <= 0)
3726 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3727 tree type, HOST_WIDE_INT words)
3729 int int_nregs, sse_nregs;
3731 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3732 cum->words += words;
3733 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3735 cum->nregs -= int_nregs;
3736 cum->sse_nregs -= sse_nregs;
3737 cum->regno += int_nregs;
3738 cum->sse_regno += sse_nregs;
3741 cum->words += words;
3745 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3746 tree type, int named ATTRIBUTE_UNUSED)
3748 HOST_WIDE_INT bytes, words;
3750 if (mode == BLKmode)
3751 bytes = int_size_in_bytes (type);
3753 bytes = GET_MODE_SIZE (mode);
3754 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3757 mode = type_natural_mode (type);
3760 function_arg_advance_64 (cum, mode, type, words);
3762 function_arg_advance_32 (cum, mode, type, bytes, words);
3765 /* Define where to put the arguments to a function.
3766 Value is zero to push the argument on the stack,
3767 or a hard register in which to store the argument.
3769 MODE is the argument's machine mode.
3770 TYPE is the data type of the argument (as a tree).
3771 This is null for libcalls where that information may
3773 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3774 the preceding args and about the function being called.
3775 NAMED is nonzero if this argument is a named parameter
3776 (otherwise it is an extra parameter matching an ellipsis). */
3779 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3780 enum machine_mode orig_mode, tree type,
3781 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3783 static bool warnedsse, warnedmmx;
3785 /* Avoid the AL settings for the Unix64 ABI. */
3786 if (mode == VOIDmode)
3802 if (words <= cum->nregs)
3804 int regno = cum->regno;
3806 /* Fastcall allocates the first two DWORD (SImode) or
3807 smaller arguments to ECX and EDX. */
3810 if (mode == BLKmode || mode == DImode)
3813 /* ECX not EAX is the first allocated register. */
3817 return gen_rtx_REG (mode, regno);
3822 if (cum->float_in_sse < 2)
3825 if (cum->float_in_sse < 1)
3835 if (!type || !AGGREGATE_TYPE_P (type))
3837 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3840 warning (0, "SSE vector argument without SSE enabled "
3844 return gen_reg_or_parallel (mode, orig_mode,
3845 cum->sse_regno + FIRST_SSE_REG);
3853 if (!type || !AGGREGATE_TYPE_P (type))
3855 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3858 warning (0, "MMX vector argument without MMX enabled "
3862 return gen_reg_or_parallel (mode, orig_mode,
3863 cum->mmx_regno + FIRST_MMX_REG);
3872 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3873 enum machine_mode orig_mode, tree type)
3875 /* Handle a hidden AL argument containing number of registers
3876 for varargs x86-64 functions. */
3877 if (mode == VOIDmode)
3878 return GEN_INT (cum->maybe_vaarg
3879 ? (cum->sse_nregs < 0
3884 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3886 &x86_64_int_parameter_registers [cum->regno],
3891 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3892 tree type, int named ATTRIBUTE_UNUSED)
3894 enum machine_mode mode = omode;
3895 HOST_WIDE_INT bytes, words;
3897 if (mode == BLKmode)
3898 bytes = int_size_in_bytes (type);
3900 bytes = GET_MODE_SIZE (mode);
3901 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3903 /* To simplify the code below, represent vector types with a vector mode
3904 even if MMX/SSE are not active. */
3905 if (type && TREE_CODE (type) == VECTOR_TYPE)
3906 mode = type_natural_mode (type);
3909 return function_arg_64 (cum, mode, omode, type);
3911 return function_arg_32 (cum, mode, omode, type, bytes, words);
3914 /* A C expression that indicates when an argument must be passed by
3915 reference. If nonzero for an argument, a copy of that argument is
3916 made in memory and a pointer to the argument is passed instead of
3917 the argument itself. The pointer is passed in whatever way is
3918 appropriate for passing a pointer to that type. */
3921 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3922 enum machine_mode mode ATTRIBUTE_UNUSED,
3923 tree type, bool named ATTRIBUTE_UNUSED)
3925 if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
3931 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3932 ABI. Only called if TARGET_SSE. */
3934 contains_128bit_aligned_vector_p (tree type)
3936 enum machine_mode mode = TYPE_MODE (type);
3937 if (SSE_REG_MODE_P (mode)
3938 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3940 if (TYPE_ALIGN (type) < 128)
3943 if (AGGREGATE_TYPE_P (type))
3945 /* Walk the aggregates recursively. */
3946 switch (TREE_CODE (type))
3950 case QUAL_UNION_TYPE:
3954 /* Walk all the structure fields. */
3955 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3957 if (TREE_CODE (field) == FIELD_DECL
3958 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3965 /* Just for use if some languages passes arrays by value. */
3966 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3977 /* Gives the alignment boundary, in bits, of an argument with the
3978 specified mode and type. */
3981 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3985 align = TYPE_ALIGN (type);
3987 align = GET_MODE_ALIGNMENT (mode);
3988 if (align < PARM_BOUNDARY)
3989 align = PARM_BOUNDARY;
3992 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3993 make an exception for SSE modes since these require 128bit
3996 The handling here differs from field_alignment. ICC aligns MMX
3997 arguments to 4 byte boundaries, while structure fields are aligned
3998 to 8 byte boundaries. */
4000 align = PARM_BOUNDARY;
4003 if (!SSE_REG_MODE_P (mode))
4004 align = PARM_BOUNDARY;
4008 if (!contains_128bit_aligned_vector_p (type))
4009 align = PARM_BOUNDARY;
4017 /* Return true if N is a possible register number of function value. */
4020 ix86_function_value_regno_p (int regno)
4027 case FIRST_FLOAT_REG:
4028 return TARGET_FLOAT_RETURNS_IN_80387;
4034 if (TARGET_MACHO || TARGET_64BIT)
4042 /* Define how to find the value returned by a function.
4043 VALTYPE is the data type of the value (as a tree).
4044 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4045 otherwise, FUNC is 0. */
4048 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4049 tree fntype, tree fn)
4053 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4054 we normally prevent this case when mmx is not available. However
4055 some ABIs may require the result to be returned like DImode. */
4056 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4057 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4059 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4060 we prevent this case when sse is not available. However some ABIs
4061 may require the result to be returned like integer TImode. */
4062 else if (mode == TImode
4063 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4064 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4066 /* Decimal floating point values can go in %eax, unlike other float modes. */
4067 else if (DECIMAL_FLOAT_MODE_P (mode))
4070 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4071 else if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4074 /* Floating point return values in %st(0), except for local functions when
4075 SSE math is enabled or for functions with sseregparm attribute. */
4078 regno = FIRST_FLOAT_REG;
4080 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4082 int sse_level = ix86_function_sseregparm (fntype, fn);
4083 if ((sse_level >= 1 && mode == SFmode)
4084 || (sse_level == 2 && mode == DFmode))
4085 regno = FIRST_SSE_REG;
4089 return gen_rtx_REG (orig_mode, regno);
4093 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4098 /* Handle libcalls, which don't provide a type node. */
4099 if (valtype == NULL)
4111 return gen_rtx_REG (mode, FIRST_SSE_REG);
4114 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4118 return gen_rtx_REG (mode, 0);
4122 ret = construct_container (mode, orig_mode, valtype, 1,
4123 REGPARM_MAX, SSE_REGPARM_MAX,
4124 x86_64_int_return_registers, 0);
4126 /* For zero sized structures, construct_container returns NULL, but we
4127 need to keep rest of compiler happy by returning meaningful value. */
4129 ret = gen_rtx_REG (orig_mode, 0);
4135 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4136 enum machine_mode orig_mode, enum machine_mode mode)
4141 if (fntype_or_decl && DECL_P (fntype_or_decl))
4142 fn = fntype_or_decl;
4143 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4146 return function_value_64 (orig_mode, mode, valtype);
4148 return function_value_32 (orig_mode, mode, fntype, fn);
4152 ix86_function_value (tree valtype, tree fntype_or_decl,
4153 bool outgoing ATTRIBUTE_UNUSED)
4155 enum machine_mode mode, orig_mode;
4157 orig_mode = TYPE_MODE (valtype);
4158 mode = type_natural_mode (valtype);
4159 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4163 ix86_libcall_value (enum machine_mode mode)
4165 return ix86_function_value_1 (NULL, NULL, mode, mode);
4168 /* Return true iff type is returned in memory. */
4171 return_in_memory_32 (tree type, enum machine_mode mode)
4175 if (mode == BLKmode)
4178 size = int_size_in_bytes (type);
4180 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4183 if (VECTOR_MODE_P (mode) || mode == TImode)
4185 /* User-created vectors small enough to fit in EAX. */
4189 /* MMX/3dNow values are returned in MM0,
4190 except when it doesn't exits. */
4192 return (TARGET_MMX ? 0 : 1);
4194 /* SSE values are returned in XMM0, except when it doesn't exist. */
4196 return (TARGET_SSE ? 0 : 1);
4211 return_in_memory_64 (tree type, enum machine_mode mode)
4213 int needed_intregs, needed_sseregs;
4214 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4218 ix86_return_in_memory (tree type)
4220 enum machine_mode mode = type_natural_mode (type);
4223 return return_in_memory_64 (type, mode);
4225 return return_in_memory_32 (type, mode);
4228 /* When returning SSE vector types, we have a choice of either
4229 (1) being abi incompatible with a -march switch, or
4230 (2) generating an error.
4231 Given no good solution, I think the safest thing is one warning.
4232 The user won't be able to use -Werror, but....
4234 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4235 called in response to actually generating a caller or callee that
4236 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4237 via aggregate_value_p for general type probing from tree-ssa. */
4240 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4242 static bool warnedsse, warnedmmx;
4244 if (!TARGET_64BIT && type)
4246 /* Look at the return type of the function, not the function type. */
4247 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4249 if (!TARGET_SSE && !warnedsse)
4252 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4255 warning (0, "SSE vector return without SSE enabled "
4260 if (!TARGET_MMX && !warnedmmx)
4262 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4265 warning (0, "MMX vector return without MMX enabled "
4275 /* Create the va_list data type. */
4278 ix86_build_builtin_va_list (void)
4280 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4282 /* For i386 we use plain pointer to argument area. */
4284 return build_pointer_type (char_type_node);
4286 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4287 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4289 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4290 unsigned_type_node);
4291 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4292 unsigned_type_node);
4293 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4295 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4298 va_list_gpr_counter_field = f_gpr;
4299 va_list_fpr_counter_field = f_fpr;
4301 DECL_FIELD_CONTEXT (f_gpr) = record;
4302 DECL_FIELD_CONTEXT (f_fpr) = record;
4303 DECL_FIELD_CONTEXT (f_ovf) = record;
4304 DECL_FIELD_CONTEXT (f_sav) = record;
4306 TREE_CHAIN (record) = type_decl;
4307 TYPE_NAME (record) = type_decl;
4308 TYPE_FIELDS (record) = f_gpr;
4309 TREE_CHAIN (f_gpr) = f_fpr;
4310 TREE_CHAIN (f_fpr) = f_ovf;
4311 TREE_CHAIN (f_ovf) = f_sav;
4313 layout_type (record);
4315 /* The correct type is an array type of one element. */
4316 return build_array_type (record, build_index_type (size_zero_node));
4319 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4322 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4332 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4335 /* Indicate to allocate space on the stack for varargs save area. */
4336 ix86_save_varrargs_registers = 1;
4337 cfun->stack_alignment_needed = 128;
4339 save_area = frame_pointer_rtx;
4340 set = get_varargs_alias_set ();
4342 for (i = cum->regno;
4344 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4347 mem = gen_rtx_MEM (Pmode,
4348 plus_constant (save_area, i * UNITS_PER_WORD));
4349 MEM_NOTRAP_P (mem) = 1;
4350 set_mem_alias_set (mem, set);
4351 emit_move_insn (mem, gen_rtx_REG (Pmode,
4352 x86_64_int_parameter_registers[i]));
4355 if (cum->sse_nregs && cfun->va_list_fpr_size)
4357 /* Now emit code to save SSE registers. The AX parameter contains number
4358 of SSE parameter registers used to call this function. We use
4359 sse_prologue_save insn template that produces computed jump across
4360 SSE saves. We need some preparation work to get this working. */
4362 label = gen_label_rtx ();
4363 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4365 /* Compute address to jump to :
4366 label - 5*eax + nnamed_sse_arguments*5 */
4367 tmp_reg = gen_reg_rtx (Pmode);
4368 nsse_reg = gen_reg_rtx (Pmode);
4369 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4370 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4371 gen_rtx_MULT (Pmode, nsse_reg,
4376 gen_rtx_CONST (DImode,
4377 gen_rtx_PLUS (DImode,
4379 GEN_INT (cum->sse_regno * 4))));
4381 emit_move_insn (nsse_reg, label_ref);
4382 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4384 /* Compute address of memory block we save into. We always use pointer
4385 pointing 127 bytes after first byte to store - this is needed to keep
4386 instruction size limited by 4 bytes. */
4387 tmp_reg = gen_reg_rtx (Pmode);
4388 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4389 plus_constant (save_area,
4390 8 * REGPARM_MAX + 127)));
4391 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4392 MEM_NOTRAP_P (mem) = 1;
4393 set_mem_alias_set (mem, set);
4394 set_mem_align (mem, BITS_PER_WORD);
4396 /* And finally do the dirty job! */
4397 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4398 GEN_INT (cum->sse_regno), label));
4403 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4404 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4407 CUMULATIVE_ARGS next_cum;
4411 /* This argument doesn't appear to be used anymore. Which is good,
4412 because the old code here didn't suppress rtl generation. */
4413 gcc_assert (!no_rtl);
4418 fntype = TREE_TYPE (current_function_decl);
4419 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4420 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4421 != void_type_node));
4423 /* For varargs, we do not want to skip the dummy va_dcl argument.
4424 For stdargs, we do want to skip the last named argument. */
4427 function_arg_advance (&next_cum, mode, type, 1);
4429 setup_incoming_varargs_64 (&next_cum);
4432 /* Implement va_start. */
4435 ix86_va_start (tree valist, rtx nextarg)
4437 HOST_WIDE_INT words, n_gpr, n_fpr;
4438 tree f_gpr, f_fpr, f_ovf, f_sav;
4439 tree gpr, fpr, ovf, sav, t;
4442 /* Only 64bit target needs something special. */
4445 std_expand_builtin_va_start (valist, nextarg);
4449 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4450 f_fpr = TREE_CHAIN (f_gpr);
4451 f_ovf = TREE_CHAIN (f_fpr);
4452 f_sav = TREE_CHAIN (f_ovf);
4454 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4455 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4456 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4457 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4458 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4460 /* Count number of gp and fp argument registers used. */
4461 words = current_function_args_info.words;
4462 n_gpr = current_function_args_info.regno;
4463 n_fpr = current_function_args_info.sse_regno;
4465 if (cfun->va_list_gpr_size)
4467 type = TREE_TYPE (gpr);
4468 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4469 build_int_cst (type, n_gpr * 8));
4470 TREE_SIDE_EFFECTS (t) = 1;
4471 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4474 if (cfun->va_list_fpr_size)
4476 type = TREE_TYPE (fpr);
4477 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4478 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4479 TREE_SIDE_EFFECTS (t) = 1;
4480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4483 /* Find the overflow area. */
4484 type = TREE_TYPE (ovf);
4485 t = make_tree (type, virtual_incoming_args_rtx);
4487 t = build2 (PLUS_EXPR, type, t,
4488 build_int_cst (type, words * UNITS_PER_WORD));
4489 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4490 TREE_SIDE_EFFECTS (t) = 1;
4491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4493 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4495 /* Find the register save area.
4496 Prologue of the function save it right above stack frame. */
4497 type = TREE_TYPE (sav);
4498 t = make_tree (type, frame_pointer_rtx);
4499 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4500 TREE_SIDE_EFFECTS (t) = 1;
4501 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4505 /* Implement va_arg. */
4508 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4510 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4511 tree f_gpr, f_fpr, f_ovf, f_sav;
4512 tree gpr, fpr, ovf, sav, t;
4514 tree lab_false, lab_over = NULL_TREE;
4519 enum machine_mode nat_mode;
4521 /* Only 64bit target needs something special. */
4523 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4525 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4526 f_fpr = TREE_CHAIN (f_gpr);
4527 f_ovf = TREE_CHAIN (f_fpr);
4528 f_sav = TREE_CHAIN (f_ovf);
4530 valist = build_va_arg_indirect_ref (valist);
4531 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4532 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4533 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4534 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4536 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4538 type = build_pointer_type (type);
4539 size = int_size_in_bytes (type);
4540 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4542 nat_mode = type_natural_mode (type);
4543 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4544 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4546 /* Pull the value out of the saved registers. */
4548 addr = create_tmp_var (ptr_type_node, "addr");
4549 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4553 int needed_intregs, needed_sseregs;
4555 tree int_addr, sse_addr;
4557 lab_false = create_artificial_label ();
4558 lab_over = create_artificial_label ();
4560 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4562 need_temp = (!REG_P (container)
4563 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4564 || TYPE_ALIGN (type) > 128));
4566 /* In case we are passing structure, verify that it is consecutive block
4567 on the register save area. If not we need to do moves. */
4568 if (!need_temp && !REG_P (container))
4570 /* Verify that all registers are strictly consecutive */
4571 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4575 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4577 rtx slot = XVECEXP (container, 0, i);
4578 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4579 || INTVAL (XEXP (slot, 1)) != i * 16)
4587 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4589 rtx slot = XVECEXP (container, 0, i);
4590 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4591 || INTVAL (XEXP (slot, 1)) != i * 8)
4603 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4604 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4605 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4606 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4609 /* First ensure that we fit completely in registers. */
4612 t = build_int_cst (TREE_TYPE (gpr),
4613 (REGPARM_MAX - needed_intregs + 1) * 8);
4614 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4615 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4616 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4617 gimplify_and_add (t, pre_p);
4621 t = build_int_cst (TREE_TYPE (fpr),
4622 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4624 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4625 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4626 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4627 gimplify_and_add (t, pre_p);
4630 /* Compute index to start of area used for integer regs. */
4633 /* int_addr = gpr + sav; */
4634 t = fold_convert (ptr_type_node, gpr);
4635 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4636 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4637 gimplify_and_add (t, pre_p);
4641 /* sse_addr = fpr + sav; */
4642 t = fold_convert (ptr_type_node, fpr);
4643 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4644 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4645 gimplify_and_add (t, pre_p);
4650 tree temp = create_tmp_var (type, "va_arg_tmp");
4653 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4654 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4655 gimplify_and_add (t, pre_p);
4657 for (i = 0; i < XVECLEN (container, 0); i++)
4659 rtx slot = XVECEXP (container, 0, i);
4660 rtx reg = XEXP (slot, 0);
4661 enum machine_mode mode = GET_MODE (reg);
4662 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4663 tree addr_type = build_pointer_type (piece_type);
4666 tree dest_addr, dest;
4668 if (SSE_REGNO_P (REGNO (reg)))
4670 src_addr = sse_addr;
4671 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4675 src_addr = int_addr;
4676 src_offset = REGNO (reg) * 8;
4678 src_addr = fold_convert (addr_type, src_addr);
4679 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4680 size_int (src_offset));
4681 src = build_va_arg_indirect_ref (src_addr);
4683 dest_addr = fold_convert (addr_type, addr);
4684 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4685 size_int (INTVAL (XEXP (slot, 1))));
4686 dest = build_va_arg_indirect_ref (dest_addr);
4688 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4689 gimplify_and_add (t, pre_p);
4695 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4696 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4697 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4698 gimplify_and_add (t, pre_p);
4702 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4703 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4704 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4705 gimplify_and_add (t, pre_p);
4708 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4709 gimplify_and_add (t, pre_p);
4711 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4712 append_to_statement_list (t, pre_p);
4715 /* ... otherwise out of the overflow area. */
4717 /* Care for on-stack alignment if needed. */
4718 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4719 || integer_zerop (TYPE_SIZE (type)))
4723 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4724 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4725 build_int_cst (TREE_TYPE (ovf), align - 1));
4726 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4727 build_int_cst (TREE_TYPE (t), -align));
4729 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4731 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4732 gimplify_and_add (t2, pre_p);
4734 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4735 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4736 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4737 gimplify_and_add (t, pre_p);
4741 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4742 append_to_statement_list (t, pre_p);
4745 ptrtype = build_pointer_type (type);
4746 addr = fold_convert (ptrtype, addr);
4749 addr = build_va_arg_indirect_ref (addr);
4750 return build_va_arg_indirect_ref (addr);
4753 /* Return nonzero if OPNUM's MEM should be matched
4754 in movabs* patterns. */
4757 ix86_check_movabs (rtx insn, int opnum)
4761 set = PATTERN (insn);
4762 if (GET_CODE (set) == PARALLEL)
4763 set = XVECEXP (set, 0, 0);
4764 gcc_assert (GET_CODE (set) == SET);
4765 mem = XEXP (set, opnum);
4766 while (GET_CODE (mem) == SUBREG)
4767 mem = SUBREG_REG (mem);
4768 gcc_assert (MEM_P (mem));
4769 return (volatile_ok || !MEM_VOLATILE_P (mem));
4772 /* Initialize the table of extra 80387 mathematical constants. */
4775 init_ext_80387_constants (void)
4777 static const char * cst[5] =
4779 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4780 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4781 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4782 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4783 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4787 for (i = 0; i < 5; i++)
4789 real_from_string (&ext_80387_constants_table[i], cst[i]);
4790 /* Ensure each constant is rounded to XFmode precision. */
4791 real_convert (&ext_80387_constants_table[i],
4792 XFmode, &ext_80387_constants_table[i]);
4795 ext_80387_constants_init = 1;
4798 /* Return true if the constant is something that can be loaded with
4799 a special instruction. */
4802 standard_80387_constant_p (rtx x)
4806 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4809 if (x == CONST0_RTX (GET_MODE (x)))
4811 if (x == CONST1_RTX (GET_MODE (x)))
4814 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4816 /* For XFmode constants, try to find a special 80387 instruction when
4817 optimizing for size or on those CPUs that benefit from them. */
4818 if (GET_MODE (x) == XFmode
4819 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4823 if (! ext_80387_constants_init)
4824 init_ext_80387_constants ();
4826 for (i = 0; i < 5; i++)
4827 if (real_identical (&r, &ext_80387_constants_table[i]))
4831 /* Load of the constant -0.0 or -1.0 will be split as
4832 fldz;fchs or fld1;fchs sequence. */
4833 if (real_isnegzero (&r))
4835 if (real_identical (&r, &dconstm1))
4841 /* Return the opcode of the special instruction to be used to load
4845 standard_80387_constant_opcode (rtx x)
4847 switch (standard_80387_constant_p (x))
4871 /* Return the CONST_DOUBLE representing the 80387 constant that is
4872 loaded by the specified special instruction. The argument IDX
4873 matches the return value from standard_80387_constant_p. */
4876 standard_80387_constant_rtx (int idx)
4880 if (! ext_80387_constants_init)
4881 init_ext_80387_constants ();
4897 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4901 /* Return 1 if mode is a valid mode for sse. */
4903 standard_sse_mode_p (enum machine_mode mode)
4920 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4923 standard_sse_constant_p (rtx x)
4925 enum machine_mode mode = GET_MODE (x);
4927 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4929 if (vector_all_ones_operand (x, mode)
4930 && standard_sse_mode_p (mode))
4931 return TARGET_SSE2 ? 2 : -1;
4936 /* Return the opcode of the special instruction to be used to load
4940 standard_sse_constant_opcode (rtx insn, rtx x)
4942 switch (standard_sse_constant_p (x))
4945 if (get_attr_mode (insn) == MODE_V4SF)
4946 return "xorps\t%0, %0";
4947 else if (get_attr_mode (insn) == MODE_V2DF)
4948 return "xorpd\t%0, %0";
4950 return "pxor\t%0, %0";
4952 return "pcmpeqd\t%0, %0";
4957 /* Returns 1 if OP contains a symbol reference */
4960 symbolic_reference_mentioned_p (rtx op)
4965 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4968 fmt = GET_RTX_FORMAT (GET_CODE (op));
4969 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4975 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4976 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4980 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4987 /* Return 1 if it is appropriate to emit `ret' instructions in the
4988 body of a function. Do this only if the epilogue is simple, needing a
4989 couple of insns. Prior to reloading, we can't tell how many registers
4990 must be saved, so return 0 then. Return 0 if there is no frame
4991 marker to de-allocate. */
4994 ix86_can_use_return_insn_p (void)
4996 struct ix86_frame frame;
4998 if (! reload_completed || frame_pointer_needed)
5001 /* Don't allow more than 32 pop, since that's all we can do
5002 with one instruction. */
5003 if (current_function_pops_args
5004 && current_function_args_size >= 32768)
5007 ix86_compute_frame_layout (&frame);
5008 return frame.to_allocate == 0 && frame.nregs == 0;
5011 /* Value should be nonzero if functions must have frame pointers.
5012 Zero means the frame pointer need not be set up (and parms may
5013 be accessed via the stack pointer) in functions that seem suitable. */
5016 ix86_frame_pointer_required (void)
5018 /* If we accessed previous frames, then the generated code expects
5019 to be able to access the saved ebp value in our frame. */
5020 if (cfun->machine->accesses_prev_frame)
5023 /* Several x86 os'es need a frame pointer for other reasons,
5024 usually pertaining to setjmp. */
5025 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5028 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5029 the frame pointer by default. Turn it back on now if we've not
5030 got a leaf function. */
5031 if (TARGET_OMIT_LEAF_FRAME_POINTER
5032 && (!current_function_is_leaf
5033 || ix86_current_function_calls_tls_descriptor))
5036 if (current_function_profile)
5042 /* Record that the current function accesses previous call frames. */
5045 ix86_setup_frame_addresses (void)
5047 cfun->machine->accesses_prev_frame = 1;
5050 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5051 # define USE_HIDDEN_LINKONCE 1
5053 # define USE_HIDDEN_LINKONCE 0
5056 static int pic_labels_used;
5058 /* Fills in the label name that should be used for a pc thunk for
5059 the given register. */
5062 get_pc_thunk_name (char name[32], unsigned int regno)
5064 gcc_assert (!TARGET_64BIT);
5066 if (USE_HIDDEN_LINKONCE)
5067 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5069 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5073 /* This function generates code for -fpic that loads %ebx with
5074 the return address of the caller and then returns. */
5077 ix86_file_end (void)
5082 for (regno = 0; regno < 8; ++regno)
5086 if (! ((pic_labels_used >> regno) & 1))
5089 get_pc_thunk_name (name, regno);
5094 switch_to_section (darwin_sections[text_coal_section]);
5095 fputs ("\t.weak_definition\t", asm_out_file);
5096 assemble_name (asm_out_file, name);
5097 fputs ("\n\t.private_extern\t", asm_out_file);
5098 assemble_name (asm_out_file, name);
5099 fputs ("\n", asm_out_file);
5100 ASM_OUTPUT_LABEL (asm_out_file, name);
5104 if (USE_HIDDEN_LINKONCE)
5108 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5110 TREE_PUBLIC (decl) = 1;
5111 TREE_STATIC (decl) = 1;
5112 DECL_ONE_ONLY (decl) = 1;
5114 (*targetm.asm_out.unique_section) (decl, 0);
5115 switch_to_section (get_named_section (decl, NULL, 0));
5117 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5118 fputs ("\t.hidden\t", asm_out_file);
5119 assemble_name (asm_out_file, name);
5120 fputc ('\n', asm_out_file);
5121 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5125 switch_to_section (text_section);
5126 ASM_OUTPUT_LABEL (asm_out_file, name);
5129 xops[0] = gen_rtx_REG (SImode, regno);
5130 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5131 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5132 output_asm_insn ("ret", xops);
5135 if (NEED_INDICATE_EXEC_STACK)
5136 file_end_indicate_exec_stack ();
5139 /* Emit code for the SET_GOT patterns. */
5142 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5148 if (TARGET_VXWORKS_RTP && flag_pic)
5150 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5151 xops[2] = gen_rtx_MEM (Pmode,
5152 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5153 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5155 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5156 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5157 an unadorned address. */
5158 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5159 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5160 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5164 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5166 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5168 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5171 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5173 output_asm_insn ("call\t%a2", xops);
5176 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5177 is what will be referenced by the Mach-O PIC subsystem. */
5179 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5182 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5183 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5186 output_asm_insn ("pop{l}\t%0", xops);
5191 get_pc_thunk_name (name, REGNO (dest));
5192 pic_labels_used |= 1 << REGNO (dest);
5194 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5195 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5196 output_asm_insn ("call\t%X2", xops);
5197 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5198 is what will be referenced by the Mach-O PIC subsystem. */
5201 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5203 targetm.asm_out.internal_label (asm_out_file, "L",
5204 CODE_LABEL_NUMBER (label));
5211 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5212 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5214 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5219 /* Generate an "push" pattern for input ARG. */
5224 return gen_rtx_SET (VOIDmode,
5226 gen_rtx_PRE_DEC (Pmode,
5227 stack_pointer_rtx)),
5231 /* Return >= 0 if there is an unused call-clobbered register available
5232 for the entire function. */
5235 ix86_select_alt_pic_regnum (void)
5237 if (current_function_is_leaf && !current_function_profile
5238 && !ix86_current_function_calls_tls_descriptor)
5241 for (i = 2; i >= 0; --i)
5242 if (!regs_ever_live[i])
5246 return INVALID_REGNUM;
5249 /* Return 1 if we need to save REGNO. */
5251 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5253 if (pic_offset_table_rtx
5254 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5255 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5256 || current_function_profile
5257 || current_function_calls_eh_return
5258 || current_function_uses_const_pool))
5260 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5265 if (current_function_calls_eh_return && maybe_eh_return)
5270 unsigned test = EH_RETURN_DATA_REGNO (i);
5271 if (test == INVALID_REGNUM)
5278 if (cfun->machine->force_align_arg_pointer
5279 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5282 return (regs_ever_live[regno]
5283 && !call_used_regs[regno]
5284 && !fixed_regs[regno]
5285 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5288 /* Return number of registers to be saved on the stack. */
5291 ix86_nsaved_regs (void)
5296 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5297 if (ix86_save_reg (regno, true))
5302 /* Return the offset between two registers, one to be eliminated, and the other
5303 its replacement, at the start of a routine. */
5306 ix86_initial_elimination_offset (int from, int to)
5308 struct ix86_frame frame;
5309 ix86_compute_frame_layout (&frame);
5311 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5312 return frame.hard_frame_pointer_offset;
5313 else if (from == FRAME_POINTER_REGNUM
5314 && to == HARD_FRAME_POINTER_REGNUM)
5315 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5318 gcc_assert (to == STACK_POINTER_REGNUM);
5320 if (from == ARG_POINTER_REGNUM)
5321 return frame.stack_pointer_offset;
5323 gcc_assert (from == FRAME_POINTER_REGNUM);
5324 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5328 /* Fill structure ix86_frame about frame of currently computed function. */
5331 ix86_compute_frame_layout (struct ix86_frame *frame)
5333 HOST_WIDE_INT total_size;
5334 unsigned int stack_alignment_needed;
5335 HOST_WIDE_INT offset;
5336 unsigned int preferred_alignment;
5337 HOST_WIDE_INT size = get_frame_size ();
5339 frame->nregs = ix86_nsaved_regs ();
5342 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5343 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5345 /* During reload iteration the amount of registers saved can change.
5346 Recompute the value as needed. Do not recompute when amount of registers
5347 didn't change as reload does multiple calls to the function and does not
5348 expect the decision to change within single iteration. */
5350 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5352 int count = frame->nregs;
5354 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5355 /* The fast prologue uses move instead of push to save registers. This
5356 is significantly longer, but also executes faster as modern hardware
5357 can execute the moves in parallel, but can't do that for push/pop.
5359 Be careful about choosing what prologue to emit: When function takes
5360 many instructions to execute we may use slow version as well as in
5361 case function is known to be outside hot spot (this is known with
5362 feedback only). Weight the size of function by number of registers
5363 to save as it is cheap to use one or two push instructions but very
5364 slow to use many of them. */
5366 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5367 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5368 || (flag_branch_probabilities
5369 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5370 cfun->machine->use_fast_prologue_epilogue = false;
5372 cfun->machine->use_fast_prologue_epilogue
5373 = !expensive_function_p (count);
5375 if (TARGET_PROLOGUE_USING_MOVE
5376 && cfun->machine->use_fast_prologue_epilogue)
5377 frame->save_regs_using_mov = true;
5379 frame->save_regs_using_mov = false;
5382 /* Skip return address and saved base pointer. */
5383 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5385 frame->hard_frame_pointer_offset = offset;
5387 /* Do some sanity checking of stack_alignment_needed and
5388 preferred_alignment, since i386 port is the only using those features
5389 that may break easily. */
5391 gcc_assert (!size || stack_alignment_needed);
5392 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5393 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5394 gcc_assert (stack_alignment_needed
5395 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5397 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5398 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5400 /* Register save area */
5401 offset += frame->nregs * UNITS_PER_WORD;
5404 if (ix86_save_varrargs_registers)
5406 offset += X86_64_VARARGS_SIZE;
5407 frame->va_arg_size = X86_64_VARARGS_SIZE;
5410 frame->va_arg_size = 0;
5412 /* Align start of frame for local function. */
5413 frame->padding1 = ((offset + stack_alignment_needed - 1)
5414 & -stack_alignment_needed) - offset;
5416 offset += frame->padding1;
5418 /* Frame pointer points here. */
5419 frame->frame_pointer_offset = offset;
5423 /* Add outgoing arguments area. Can be skipped if we eliminated
5424 all the function calls as dead code.
5425 Skipping is however impossible when function calls alloca. Alloca
5426 expander assumes that last current_function_outgoing_args_size
5427 of stack frame are unused. */
5428 if (ACCUMULATE_OUTGOING_ARGS
5429 && (!current_function_is_leaf || current_function_calls_alloca
5430 || ix86_current_function_calls_tls_descriptor))
5432 offset += current_function_outgoing_args_size;
5433 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5436 frame->outgoing_arguments_size = 0;
5438 /* Align stack boundary. Only needed if we're calling another function
5440 if (!current_function_is_leaf || current_function_calls_alloca
5441 || ix86_current_function_calls_tls_descriptor)
5442 frame->padding2 = ((offset + preferred_alignment - 1)
5443 & -preferred_alignment) - offset;
5445 frame->padding2 = 0;
5447 offset += frame->padding2;
5449 /* We've reached end of stack frame. */
5450 frame->stack_pointer_offset = offset;
5452 /* Size prologue needs to allocate. */
5453 frame->to_allocate =
5454 (size + frame->padding1 + frame->padding2
5455 + frame->outgoing_arguments_size + frame->va_arg_size);
5457 if ((!frame->to_allocate && frame->nregs <= 1)
5458 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5459 frame->save_regs_using_mov = false;
5461 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5462 && current_function_is_leaf
5463 && !ix86_current_function_calls_tls_descriptor)
5465 frame->red_zone_size = frame->to_allocate;
5466 if (frame->save_regs_using_mov)
5467 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5468 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5469 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5472 frame->red_zone_size = 0;
5473 frame->to_allocate -= frame->red_zone_size;
5474 frame->stack_pointer_offset -= frame->red_zone_size;
5476 fprintf (stderr, "\n");
5477 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5478 fprintf (stderr, "size: %ld\n", (long)size);
5479 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5480 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5481 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5482 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5483 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5484 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5485 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5486 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5487 (long)frame->hard_frame_pointer_offset);
5488 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5489 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5490 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5491 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5495 /* Emit code to save registers in the prologue. */
5498 ix86_emit_save_regs (void)
5503 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5504 if (ix86_save_reg (regno, true))
5506 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5507 RTX_FRAME_RELATED_P (insn) = 1;
5511 /* Emit code to save registers using MOV insns. First register
5512 is restored from POINTER + OFFSET. */
5514 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5519 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5520 if (ix86_save_reg (regno, true))
5522 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5524 gen_rtx_REG (Pmode, regno));
5525 RTX_FRAME_RELATED_P (insn) = 1;
5526 offset += UNITS_PER_WORD;
5530 /* Expand prologue or epilogue stack adjustment.
5531 The pattern exist to put a dependency on all ebp-based memory accesses.
5532 STYLE should be negative if instructions should be marked as frame related,
5533 zero if %r11 register is live and cannot be freely used and positive
5537 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5542 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5543 else if (x86_64_immediate_operand (offset, DImode))
5544 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5548 /* r11 is used by indirect sibcall return as well, set before the
5549 epilogue and used after the epilogue. ATM indirect sibcall
5550 shouldn't be used together with huge frame sizes in one
5551 function because of the frame_size check in sibcall.c. */
5553 r11 = gen_rtx_REG (DImode, R11_REG);
5554 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5561 RTX_FRAME_RELATED_P (insn) = 1;
5564 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5567 ix86_internal_arg_pointer (void)
5569 bool has_force_align_arg_pointer =
5570 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5571 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5572 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5573 && DECL_NAME (current_function_decl)
5574 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5575 && DECL_FILE_SCOPE_P (current_function_decl))
5576 || ix86_force_align_arg_pointer
5577 || has_force_align_arg_pointer)
5579 /* Nested functions can't realign the stack due to a register
5581 if (DECL_CONTEXT (current_function_decl)
5582 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5584 if (ix86_force_align_arg_pointer)
5585 warning (0, "-mstackrealign ignored for nested functions");
5586 if (has_force_align_arg_pointer)
5587 error ("%s not supported for nested functions",
5588 ix86_force_align_arg_pointer_string);
5589 return virtual_incoming_args_rtx;
5591 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5592 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5595 return virtual_incoming_args_rtx;
5598 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5599 This is called from dwarf2out.c to emit call frame instructions
5600 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5602 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5604 rtx unspec = SET_SRC (pattern);
5605 gcc_assert (GET_CODE (unspec) == UNSPEC);
5609 case UNSPEC_REG_SAVE:
5610 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5611 SET_DEST (pattern));
5613 case UNSPEC_DEF_CFA:
5614 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5615 INTVAL (XVECEXP (unspec, 0, 0)));
5622 /* Expand the prologue into a bunch of separate insns. */
5625 ix86_expand_prologue (void)
5629 struct ix86_frame frame;
5630 HOST_WIDE_INT allocate;
5632 ix86_compute_frame_layout (&frame);
5634 if (cfun->machine->force_align_arg_pointer)
5638 /* Grab the argument pointer. */
5639 x = plus_constant (stack_pointer_rtx, 4);
5640 y = cfun->machine->force_align_arg_pointer;
5641 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5642 RTX_FRAME_RELATED_P (insn) = 1;
5644 /* The unwind info consists of two parts: install the fafp as the cfa,
5645 and record the fafp as the "save register" of the stack pointer.
5646 The later is there in order that the unwinder can see where it
5647 should restore the stack pointer across the and insn. */
5648 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5649 x = gen_rtx_SET (VOIDmode, y, x);
5650 RTX_FRAME_RELATED_P (x) = 1;
5651 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5653 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5654 RTX_FRAME_RELATED_P (y) = 1;
5655 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5656 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5657 REG_NOTES (insn) = x;
5659 /* Align the stack. */
5660 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5663 /* And here we cheat like madmen with the unwind info. We force the
5664 cfa register back to sp+4, which is exactly what it was at the
5665 start of the function. Re-pushing the return address results in
5666 the return at the same spot relative to the cfa, and thus is
5667 correct wrt the unwind info. */
5668 x = cfun->machine->force_align_arg_pointer;
5669 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5670 insn = emit_insn (gen_push (x));
5671 RTX_FRAME_RELATED_P (insn) = 1;
5674 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5675 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5676 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5677 REG_NOTES (insn) = x;
5680 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5681 slower on all targets. Also sdb doesn't like it. */
5683 if (frame_pointer_needed)
5685 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5686 RTX_FRAME_RELATED_P (insn) = 1;
5688 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5689 RTX_FRAME_RELATED_P (insn) = 1;
5692 allocate = frame.to_allocate;
5694 if (!frame.save_regs_using_mov)
5695 ix86_emit_save_regs ();
5697 allocate += frame.nregs * UNITS_PER_WORD;
5699 /* When using red zone we may start register saving before allocating
5700 the stack frame saving one cycle of the prologue. */
5701 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5702 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5703 : stack_pointer_rtx,
5704 -frame.nregs * UNITS_PER_WORD);
5708 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5709 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5710 GEN_INT (-allocate), -1);
5713 /* Only valid for Win32. */
5714 rtx eax = gen_rtx_REG (SImode, 0);
5715 bool eax_live = ix86_eax_live_at_start_p ();
5718 gcc_assert (!TARGET_64BIT);
5722 emit_insn (gen_push (eax));
5726 emit_move_insn (eax, GEN_INT (allocate));
5728 insn = emit_insn (gen_allocate_stack_worker (eax));
5729 RTX_FRAME_RELATED_P (insn) = 1;
5730 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5731 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5732 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5733 t, REG_NOTES (insn));
5737 if (frame_pointer_needed)
5738 t = plus_constant (hard_frame_pointer_rtx,
5741 - frame.nregs * UNITS_PER_WORD);
5743 t = plus_constant (stack_pointer_rtx, allocate);
5744 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5748 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5750 if (!frame_pointer_needed || !frame.to_allocate)
5751 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5753 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5754 -frame.nregs * UNITS_PER_WORD);
5757 pic_reg_used = false;
5758 if (pic_offset_table_rtx
5759 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5760 || current_function_profile))
5762 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5764 if (alt_pic_reg_used != INVALID_REGNUM)
5765 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5767 pic_reg_used = true;
5774 if (ix86_cmodel == CM_LARGE_PIC)
5776 rtx tmp_reg = gen_rtx_REG (DImode,
5777 FIRST_REX_INT_REG + 3 /* R11 */);
5778 rtx label = gen_label_rtx ();
5780 LABEL_PRESERVE_P (label) = 1;
5781 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5782 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5783 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5784 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5785 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5786 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5787 pic_offset_table_rtx, tmp_reg));
5790 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5793 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5795 /* Even with accurate pre-reload life analysis, we can wind up
5796 deleting all references to the pic register after reload.
5797 Consider if cross-jumping unifies two sides of a branch
5798 controlled by a comparison vs the only read from a global.
5799 In which case, allow the set_got to be deleted, though we're
5800 too late to do anything about the ebx save in the prologue. */
5801 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5804 /* Prevent function calls from be scheduled before the call to mcount.
5805 In the pic_reg_used case, make sure that the got load isn't deleted. */
5806 if (current_function_profile)
5807 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5810 /* Emit code to restore saved registers using MOV insns. First register
5811 is restored from POINTER + OFFSET. */
5813 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5814 int maybe_eh_return)
5817 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5819 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5820 if (ix86_save_reg (regno, maybe_eh_return))
5822 /* Ensure that adjust_address won't be forced to produce pointer
5823 out of range allowed by x86-64 instruction set. */
5824 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5828 r11 = gen_rtx_REG (DImode, R11_REG);
5829 emit_move_insn (r11, GEN_INT (offset));
5830 emit_insn (gen_adddi3 (r11, r11, pointer));
5831 base_address = gen_rtx_MEM (Pmode, r11);
5834 emit_move_insn (gen_rtx_REG (Pmode, regno),
5835 adjust_address (base_address, Pmode, offset));
5836 offset += UNITS_PER_WORD;
5840 /* Restore function stack, frame, and registers. */
5843 ix86_expand_epilogue (int style)
5846 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5847 struct ix86_frame frame;
5848 HOST_WIDE_INT offset;
5850 ix86_compute_frame_layout (&frame);
5852 /* Calculate start of saved registers relative to ebp. Special care
5853 must be taken for the normal return case of a function using
5854 eh_return: the eax and edx registers are marked as saved, but not
5855 restored along this path. */
5856 offset = frame.nregs;
5857 if (current_function_calls_eh_return && style != 2)
5859 offset *= -UNITS_PER_WORD;
5861 /* If we're only restoring one register and sp is not valid then
5862 using a move instruction to restore the register since it's
5863 less work than reloading sp and popping the register.
5865 The default code result in stack adjustment using add/lea instruction,
5866 while this code results in LEAVE instruction (or discrete equivalent),
5867 so it is profitable in some other cases as well. Especially when there
5868 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5869 and there is exactly one register to pop. This heuristic may need some
5870 tuning in future. */
5871 if ((!sp_valid && frame.nregs <= 1)
5872 || (TARGET_EPILOGUE_USING_MOVE
5873 && cfun->machine->use_fast_prologue_epilogue
5874 && (frame.nregs > 1 || frame.to_allocate))
5875 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5876 || (frame_pointer_needed && TARGET_USE_LEAVE
5877 && cfun->machine->use_fast_prologue_epilogue
5878 && frame.nregs == 1)
5879 || current_function_calls_eh_return)
5881 /* Restore registers. We can use ebp or esp to address the memory
5882 locations. If both are available, default to ebp, since offsets
5883 are known to be small. Only exception is esp pointing directly to the
5884 end of block of saved registers, where we may simplify addressing
5887 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5888 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5889 frame.to_allocate, style == 2);
5891 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5892 offset, style == 2);
5894 /* eh_return epilogues need %ecx added to the stack pointer. */
5897 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5899 if (frame_pointer_needed)
5901 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5902 tmp = plus_constant (tmp, UNITS_PER_WORD);
5903 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5905 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5906 emit_move_insn (hard_frame_pointer_rtx, tmp);
5908 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5913 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5914 tmp = plus_constant (tmp, (frame.to_allocate
5915 + frame.nregs * UNITS_PER_WORD));
5916 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5919 else if (!frame_pointer_needed)
5920 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5921 GEN_INT (frame.to_allocate
5922 + frame.nregs * UNITS_PER_WORD),
5924 /* If not an i386, mov & pop is faster than "leave". */
5925 else if (TARGET_USE_LEAVE || optimize_size
5926 || !cfun->machine->use_fast_prologue_epilogue)
5927 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5930 pro_epilogue_adjust_stack (stack_pointer_rtx,
5931 hard_frame_pointer_rtx,
5934 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5936 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5941 /* First step is to deallocate the stack frame so that we can
5942 pop the registers. */
5945 gcc_assert (frame_pointer_needed);
5946 pro_epilogue_adjust_stack (stack_pointer_rtx,
5947 hard_frame_pointer_rtx,
5948 GEN_INT (offset), style);
5950 else if (frame.to_allocate)
5951 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5952 GEN_INT (frame.to_allocate), style);
5954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5955 if (ix86_save_reg (regno, false))
5958 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5960 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5962 if (frame_pointer_needed)
5964 /* Leave results in shorter dependency chains on CPUs that are
5965 able to grok it fast. */
5966 if (TARGET_USE_LEAVE)
5967 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5968 else if (TARGET_64BIT)
5969 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5971 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5975 if (cfun->machine->force_align_arg_pointer)
5977 emit_insn (gen_addsi3 (stack_pointer_rtx,
5978 cfun->machine->force_align_arg_pointer,
5982 /* Sibcall epilogues don't want a return instruction. */
5986 if (current_function_pops_args && current_function_args_size)
5988 rtx popc = GEN_INT (current_function_pops_args);
5990 /* i386 can only pop 64K bytes. If asked to pop more, pop
5991 return address, do explicit add, and jump indirectly to the
5994 if (current_function_pops_args >= 65536)
5996 rtx ecx = gen_rtx_REG (SImode, 2);
5998 /* There is no "pascal" calling convention in 64bit ABI. */
5999 gcc_assert (!TARGET_64BIT);
6001 emit_insn (gen_popsi1 (ecx));
6002 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6003 emit_jump_insn (gen_return_indirect_internal (ecx));
6006 emit_jump_insn (gen_return_pop_internal (popc));
6009 emit_jump_insn (gen_return_internal ());
6012 /* Reset from the function's potential modifications. */
6015 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6016 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6018 if (pic_offset_table_rtx)
6019 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6021 /* Mach-O doesn't support labels at the end of objects, so if
6022 it looks like we might want one, insert a NOP. */
6024 rtx insn = get_last_insn ();
6027 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6028 insn = PREV_INSN (insn);
6032 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6033 fputs ("\tnop\n", file);
6039 /* Extract the parts of an RTL expression that is a valid memory address
6040 for an instruction. Return 0 if the structure of the address is
6041 grossly off. Return -1 if the address contains ASHIFT, so it is not
6042 strictly valid, but still used for computing length of lea instruction. */
6045 ix86_decompose_address (rtx addr, struct ix86_address *out)
6047 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6048 rtx base_reg, index_reg;
6049 HOST_WIDE_INT scale = 1;
6050 rtx scale_rtx = NULL_RTX;
6052 enum ix86_address_seg seg = SEG_DEFAULT;
6054 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6056 else if (GET_CODE (addr) == PLUS)
6066 addends[n++] = XEXP (op, 1);
6069 while (GET_CODE (op) == PLUS);
6074 for (i = n; i >= 0; --i)
6077 switch (GET_CODE (op))
6082 index = XEXP (op, 0);
6083 scale_rtx = XEXP (op, 1);
6087 if (XINT (op, 1) == UNSPEC_TP
6088 && TARGET_TLS_DIRECT_SEG_REFS
6089 && seg == SEG_DEFAULT)
6090 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6119 else if (GET_CODE (addr) == MULT)
6121 index = XEXP (addr, 0); /* index*scale */
6122 scale_rtx = XEXP (addr, 1);
6124 else if (GET_CODE (addr) == ASHIFT)
6128 /* We're called for lea too, which implements ashift on occasion. */
6129 index = XEXP (addr, 0);
6130 tmp = XEXP (addr, 1);
6131 if (!CONST_INT_P (tmp))
6133 scale = INTVAL (tmp);
6134 if ((unsigned HOST_WIDE_INT) scale > 3)
6140 disp = addr; /* displacement */
6142 /* Extract the integral value of scale. */
6145 if (!CONST_INT_P (scale_rtx))
6147 scale = INTVAL (scale_rtx);
6150 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6151 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6153 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6154 if (base_reg && index_reg && scale == 1
6155 && (index_reg == arg_pointer_rtx
6156 || index_reg == frame_pointer_rtx
6157 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6160 tmp = base, base = index, index = tmp;
6161 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6164 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6165 if ((base_reg == hard_frame_pointer_rtx
6166 || base_reg == frame_pointer_rtx
6167 || base_reg == arg_pointer_rtx) && !disp)
6170 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6171 Avoid this by transforming to [%esi+0]. */
6172 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6173 && base_reg && !index_reg && !disp
6175 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6178 /* Special case: encode reg+reg instead of reg*2. */
6179 if (!base && index && scale && scale == 2)
6180 base = index, base_reg = index_reg, scale = 1;
6182 /* Special case: scaling cannot be encoded without base or displacement. */
6183 if (!base && !disp && index && scale != 1)
6195 /* Return cost of the memory address x.
6196 For i386, it is better to use a complex address than let gcc copy
6197 the address into a reg and make a new pseudo. But not if the address
6198 requires to two regs - that would mean more pseudos with longer
6201 ix86_address_cost (rtx x)
6203 struct ix86_address parts;
6205 int ok = ix86_decompose_address (x, &parts);
6209 if (parts.base && GET_CODE (parts.base) == SUBREG)
6210 parts.base = SUBREG_REG (parts.base);
6211 if (parts.index && GET_CODE (parts.index) == SUBREG)
6212 parts.index = SUBREG_REG (parts.index);
6214 /* More complex memory references are better. */
6215 if (parts.disp && parts.disp != const0_rtx)
6217 if (parts.seg != SEG_DEFAULT)
6220 /* Attempt to minimize number of registers in the address. */
6222 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6224 && (!REG_P (parts.index)
6225 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6229 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6231 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6232 && parts.base != parts.index)
6235 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6236 since it's predecode logic can't detect the length of instructions
6237 and it degenerates to vector decoded. Increase cost of such
6238 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6239 to split such addresses or even refuse such addresses at all.
6241 Following addressing modes are affected:
6246 The first and last case may be avoidable by explicitly coding the zero in
6247 memory address, but I don't have AMD-K6 machine handy to check this
6251 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6252 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6253 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6259 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6260 this is used for to form addresses to local data when -fPIC is in
6264 darwin_local_data_pic (rtx disp)
6266 if (GET_CODE (disp) == MINUS)
6268 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6269 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6270 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6272 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6273 if (! strcmp (sym_name, "<pic base>"))
6281 /* Determine if a given RTX is a valid constant. We already know this
6282 satisfies CONSTANT_P. */
6285 legitimate_constant_p (rtx x)
6287 switch (GET_CODE (x))
6292 if (GET_CODE (x) == PLUS)
6294 if (!CONST_INT_P (XEXP (x, 1)))
6299 if (TARGET_MACHO && darwin_local_data_pic (x))
6302 /* Only some unspecs are valid as "constants". */
6303 if (GET_CODE (x) == UNSPEC)
6304 switch (XINT (x, 1))
6309 return TARGET_64BIT;
6312 x = XVECEXP (x, 0, 0);
6313 return (GET_CODE (x) == SYMBOL_REF
6314 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6316 x = XVECEXP (x, 0, 0);
6317 return (GET_CODE (x) == SYMBOL_REF
6318 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6323 /* We must have drilled down to a symbol. */
6324 if (GET_CODE (x) == LABEL_REF)
6326 if (GET_CODE (x) != SYMBOL_REF)
6331 /* TLS symbols are never valid. */
6332 if (SYMBOL_REF_TLS_MODEL (x))
6335 /* DLLIMPORT symbols are never valid. */
6336 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6337 && SYMBOL_REF_DLLIMPORT_P (x))
6342 if (GET_MODE (x) == TImode
6343 && x != CONST0_RTX (TImode)
6349 if (x == CONST0_RTX (GET_MODE (x)))
6357 /* Otherwise we handle everything else in the move patterns. */
6361 /* Determine if it's legal to put X into the constant pool. This
6362 is not possible for the address of thread-local symbols, which
6363 is checked above. */
6366 ix86_cannot_force_const_mem (rtx x)
6368 /* We can always put integral constants and vectors in memory. */
6369 switch (GET_CODE (x))
6379 return !legitimate_constant_p (x);
6382 /* Determine if a given RTX is a valid constant address. */
6385 constant_address_p (rtx x)
6387 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6390 /* Nonzero if the constant value X is a legitimate general operand
6391 when generating PIC code. It is given that flag_pic is on and
6392 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6395 legitimate_pic_operand_p (rtx x)
6399 switch (GET_CODE (x))
6402 inner = XEXP (x, 0);
6403 if (GET_CODE (inner) == PLUS
6404 && CONST_INT_P (XEXP (inner, 1)))
6405 inner = XEXP (inner, 0);
6407 /* Only some unspecs are valid as "constants". */
6408 if (GET_CODE (inner) == UNSPEC)
6409 switch (XINT (inner, 1))
6414 return TARGET_64BIT;
6416 x = XVECEXP (inner, 0, 0);
6417 return (GET_CODE (x) == SYMBOL_REF
6418 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6426 return legitimate_pic_address_disp_p (x);
6433 /* Determine if a given CONST RTX is a valid memory displacement
6437 legitimate_pic_address_disp_p (rtx disp)
6441 /* In 64bit mode we can allow direct addresses of symbols and labels
6442 when they are not dynamic symbols. */
6445 rtx op0 = disp, op1;
6447 switch (GET_CODE (disp))
6453 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6455 op0 = XEXP (XEXP (disp, 0), 0);
6456 op1 = XEXP (XEXP (disp, 0), 1);
6457 if (!CONST_INT_P (op1)
6458 || INTVAL (op1) >= 16*1024*1024
6459 || INTVAL (op1) < -16*1024*1024)
6461 if (GET_CODE (op0) == LABEL_REF)
6463 if (GET_CODE (op0) != SYMBOL_REF)
6468 /* TLS references should always be enclosed in UNSPEC. */
6469 if (SYMBOL_REF_TLS_MODEL (op0))
6471 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6472 && ix86_cmodel != CM_LARGE_PIC)
6480 if (GET_CODE (disp) != CONST)
6482 disp = XEXP (disp, 0);
6486 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6487 of GOT tables. We should not need these anyway. */
6488 if (GET_CODE (disp) != UNSPEC
6489 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6490 && XINT (disp, 1) != UNSPEC_GOTOFF
6491 && XINT (disp, 1) != UNSPEC_PLTOFF))
6494 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6495 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6501 if (GET_CODE (disp) == PLUS)
6503 if (!CONST_INT_P (XEXP (disp, 1)))
6505 disp = XEXP (disp, 0);
6509 if (TARGET_MACHO && darwin_local_data_pic (disp))
6512 if (GET_CODE (disp) != UNSPEC)
6515 switch (XINT (disp, 1))
6520 /* We need to check for both symbols and labels because VxWorks loads
6521 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6523 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6524 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6526 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6527 While ABI specify also 32bit relocation but we don't produce it in
6528 small PIC model at all. */
6529 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6530 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6532 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6534 case UNSPEC_GOTTPOFF:
6535 case UNSPEC_GOTNTPOFF:
6536 case UNSPEC_INDNTPOFF:
6539 disp = XVECEXP (disp, 0, 0);
6540 return (GET_CODE (disp) == SYMBOL_REF
6541 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6543 disp = XVECEXP (disp, 0, 0);
6544 return (GET_CODE (disp) == SYMBOL_REF
6545 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6547 disp = XVECEXP (disp, 0, 0);
6548 return (GET_CODE (disp) == SYMBOL_REF
6549 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6555 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6556 memory address for an instruction. The MODE argument is the machine mode
6557 for the MEM expression that wants to use this address.
6559 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6560 convert common non-canonical forms to canonical form so that they will
6564 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6565 rtx addr, int strict)
6567 struct ix86_address parts;
6568 rtx base, index, disp;
6569 HOST_WIDE_INT scale;
6570 const char *reason = NULL;
6571 rtx reason_rtx = NULL_RTX;
6573 if (ix86_decompose_address (addr, &parts) <= 0)
6575 reason = "decomposition failed";
6580 index = parts.index;
6582 scale = parts.scale;
6584 /* Validate base register.
6586 Don't allow SUBREG's that span more than a word here. It can lead to spill
6587 failures when the base is one word out of a two word structure, which is
6588 represented internally as a DImode int. */
6597 else if (GET_CODE (base) == SUBREG
6598 && REG_P (SUBREG_REG (base))
6599 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6601 reg = SUBREG_REG (base);
6604 reason = "base is not a register";
6608 if (GET_MODE (base) != Pmode)
6610 reason = "base is not in Pmode";
6614 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6615 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6617 reason = "base is not valid";
6622 /* Validate index register.
6624 Don't allow SUBREG's that span more than a word here -- same as above. */
6633 else if (GET_CODE (index) == SUBREG
6634 && REG_P (SUBREG_REG (index))
6635 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6637 reg = SUBREG_REG (index);
6640 reason = "index is not a register";
6644 if (GET_MODE (index) != Pmode)
6646 reason = "index is not in Pmode";
6650 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6651 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6653 reason = "index is not valid";
6658 /* Validate scale factor. */
6661 reason_rtx = GEN_INT (scale);
6664 reason = "scale without index";
6668 if (scale != 2 && scale != 4 && scale != 8)
6670 reason = "scale is not a valid multiplier";
6675 /* Validate displacement. */
6680 if (GET_CODE (disp) == CONST
6681 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6682 switch (XINT (XEXP (disp, 0), 1))
6684 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6685 used. While ABI specify also 32bit relocations, we don't produce
6686 them at all and use IP relative instead. */
6689 gcc_assert (flag_pic);
6691 goto is_legitimate_pic;
6692 reason = "64bit address unspec";
6695 case UNSPEC_GOTPCREL:
6696 gcc_assert (flag_pic);
6697 goto is_legitimate_pic;
6699 case UNSPEC_GOTTPOFF:
6700 case UNSPEC_GOTNTPOFF:
6701 case UNSPEC_INDNTPOFF:
6707 reason = "invalid address unspec";
6711 else if (SYMBOLIC_CONST (disp)
6715 && MACHOPIC_INDIRECT
6716 && !machopic_operand_p (disp)
6722 if (TARGET_64BIT && (index || base))
6724 /* foo@dtpoff(%rX) is ok. */
6725 if (GET_CODE (disp) != CONST
6726 || GET_CODE (XEXP (disp, 0)) != PLUS
6727 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6728 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6729 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6730 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6732 reason = "non-constant pic memory reference";
6736 else if (! legitimate_pic_address_disp_p (disp))
6738 reason = "displacement is an invalid pic construct";
6742 /* This code used to verify that a symbolic pic displacement
6743 includes the pic_offset_table_rtx register.
6745 While this is good idea, unfortunately these constructs may
6746 be created by "adds using lea" optimization for incorrect
6755 This code is nonsensical, but results in addressing
6756 GOT table with pic_offset_table_rtx base. We can't
6757 just refuse it easily, since it gets matched by
6758 "addsi3" pattern, that later gets split to lea in the
6759 case output register differs from input. While this
6760 can be handled by separate addsi pattern for this case
6761 that never results in lea, this seems to be easier and
6762 correct fix for crash to disable this test. */
6764 else if (GET_CODE (disp) != LABEL_REF
6765 && !CONST_INT_P (disp)
6766 && (GET_CODE (disp) != CONST
6767 || !legitimate_constant_p (disp))
6768 && (GET_CODE (disp) != SYMBOL_REF
6769 || !legitimate_constant_p (disp)))
6771 reason = "displacement is not constant";
6774 else if (TARGET_64BIT
6775 && !x86_64_immediate_operand (disp, VOIDmode))
6777 reason = "displacement is out of range";
6782 /* Everything looks valid. */
6789 /* Return a unique alias set for the GOT. */
6791 static HOST_WIDE_INT
6792 ix86_GOT_alias_set (void)
6794 static HOST_WIDE_INT set = -1;
6796 set = new_alias_set ();
6800 /* Return a legitimate reference for ORIG (an address) using the
6801 register REG. If REG is 0, a new pseudo is generated.
6803 There are two types of references that must be handled:
6805 1. Global data references must load the address from the GOT, via
6806 the PIC reg. An insn is emitted to do this load, and the reg is
6809 2. Static data references, constant pool addresses, and code labels
6810 compute the address as an offset from the GOT, whose base is in
6811 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6812 differentiate them from global data objects. The returned
6813 address is the PIC reg + an unspec constant.
6815 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6816 reg also appears in the address. */
6819 legitimize_pic_address (rtx orig, rtx reg)
6826 if (TARGET_MACHO && !TARGET_64BIT)
6829 reg = gen_reg_rtx (Pmode);
6830 /* Use the generic Mach-O PIC machinery. */
6831 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6835 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6837 else if (TARGET_64BIT
6838 && ix86_cmodel != CM_SMALL_PIC
6839 && gotoff_operand (addr, Pmode))
6842 /* This symbol may be referenced via a displacement from the PIC
6843 base address (@GOTOFF). */
6845 if (reload_in_progress)
6846 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6847 if (GET_CODE (addr) == CONST)
6848 addr = XEXP (addr, 0);
6849 if (GET_CODE (addr) == PLUS)
6851 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6852 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6855 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6856 new = gen_rtx_CONST (Pmode, new);
6858 tmpreg = gen_reg_rtx (Pmode);
6861 emit_move_insn (tmpreg, new);
6865 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6866 tmpreg, 1, OPTAB_DIRECT);
6869 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6871 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
6873 /* This symbol may be referenced via a displacement from the PIC
6874 base address (@GOTOFF). */
6876 if (reload_in_progress)
6877 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6878 if (GET_CODE (addr) == CONST)
6879 addr = XEXP (addr, 0);
6880 if (GET_CODE (addr) == PLUS)
6882 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6883 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6886 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6887 new = gen_rtx_CONST (Pmode, new);
6888 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6892 emit_move_insn (reg, new);
6896 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6897 /* We can't use @GOTOFF for text labels on VxWorks;
6898 see gotoff_operand. */
6899 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
6901 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
6903 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6904 new = gen_rtx_CONST (Pmode, new);
6905 new = gen_const_mem (Pmode, new);
6906 set_mem_alias_set (new, ix86_GOT_alias_set ());
6909 reg = gen_reg_rtx (Pmode);
6910 /* Use directly gen_movsi, otherwise the address is loaded
6911 into register for CSE. We don't want to CSE this addresses,
6912 instead we CSE addresses from the GOT table, so skip this. */
6913 emit_insn (gen_movsi (reg, new));
6918 /* This symbol must be referenced via a load from the
6919 Global Offset Table (@GOT). */
6921 if (reload_in_progress)
6922 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6923 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6924 new = gen_rtx_CONST (Pmode, new);
6926 new = force_reg (Pmode, new);
6927 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6928 new = gen_const_mem (Pmode, new);
6929 set_mem_alias_set (new, ix86_GOT_alias_set ());
6932 reg = gen_reg_rtx (Pmode);
6933 emit_move_insn (reg, new);
6939 if (CONST_INT_P (addr)
6940 && !x86_64_immediate_operand (addr, VOIDmode))
6944 emit_move_insn (reg, addr);
6948 new = force_reg (Pmode, addr);
6950 else if (GET_CODE (addr) == CONST)
6952 addr = XEXP (addr, 0);
6954 /* We must match stuff we generate before. Assume the only
6955 unspecs that can get here are ours. Not that we could do
6956 anything with them anyway.... */
6957 if (GET_CODE (addr) == UNSPEC
6958 || (GET_CODE (addr) == PLUS
6959 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6961 gcc_assert (GET_CODE (addr) == PLUS);
6963 if (GET_CODE (addr) == PLUS)
6965 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6967 /* Check first to see if this is a constant offset from a @GOTOFF
6968 symbol reference. */
6969 if (gotoff_operand (op0, Pmode)
6970 && CONST_INT_P (op1))
6974 if (reload_in_progress)
6975 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6978 new = gen_rtx_PLUS (Pmode, new, op1);
6979 new = gen_rtx_CONST (Pmode, new);
6980 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6984 emit_move_insn (reg, new);
6990 if (INTVAL (op1) < -16*1024*1024
6991 || INTVAL (op1) >= 16*1024*1024)
6993 if (!x86_64_immediate_operand (op1, Pmode))
6994 op1 = force_reg (Pmode, op1);
6995 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7001 base = legitimize_pic_address (XEXP (addr, 0), reg);
7002 new = legitimize_pic_address (XEXP (addr, 1),
7003 base == reg ? NULL_RTX : reg);
7005 if (CONST_INT_P (new))
7006 new = plus_constant (base, INTVAL (new));
7009 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7011 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7012 new = XEXP (new, 1);
7014 new = gen_rtx_PLUS (Pmode, base, new);
7022 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7025 get_thread_pointer (int to_reg)
7029 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7033 reg = gen_reg_rtx (Pmode);
7034 insn = gen_rtx_SET (VOIDmode, reg, tp);
7035 insn = emit_insn (insn);
7040 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7041 false if we expect this to be used for a memory address and true if
7042 we expect to load the address into a register. */
7045 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7047 rtx dest, base, off, pic, tp;
7052 case TLS_MODEL_GLOBAL_DYNAMIC:
7053 dest = gen_reg_rtx (Pmode);
7054 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7056 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7058 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7061 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7062 insns = get_insns ();
7065 emit_libcall_block (insns, dest, rax, x);
7067 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7068 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7070 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7072 if (TARGET_GNU2_TLS)
7074 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7076 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7080 case TLS_MODEL_LOCAL_DYNAMIC:
7081 base = gen_reg_rtx (Pmode);
7082 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7084 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7086 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7089 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7090 insns = get_insns ();
7093 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7094 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7095 emit_libcall_block (insns, base, rax, note);
7097 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7098 emit_insn (gen_tls_local_dynamic_base_64 (base));
7100 emit_insn (gen_tls_local_dynamic_base_32 (base));
7102 if (TARGET_GNU2_TLS)
7104 rtx x = ix86_tls_module_base ();
7106 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7107 gen_rtx_MINUS (Pmode, x, tp));
7110 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7111 off = gen_rtx_CONST (Pmode, off);
7113 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7115 if (TARGET_GNU2_TLS)
7117 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7119 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7124 case TLS_MODEL_INITIAL_EXEC:
7128 type = UNSPEC_GOTNTPOFF;
7132 if (reload_in_progress)
7133 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7134 pic = pic_offset_table_rtx;
7135 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7137 else if (!TARGET_ANY_GNU_TLS)
7139 pic = gen_reg_rtx (Pmode);
7140 emit_insn (gen_set_got (pic));
7141 type = UNSPEC_GOTTPOFF;
7146 type = UNSPEC_INDNTPOFF;
7149 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7150 off = gen_rtx_CONST (Pmode, off);
7152 off = gen_rtx_PLUS (Pmode, pic, off);
7153 off = gen_const_mem (Pmode, off);
7154 set_mem_alias_set (off, ix86_GOT_alias_set ());
7156 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7158 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7159 off = force_reg (Pmode, off);
7160 return gen_rtx_PLUS (Pmode, base, off);
7164 base = get_thread_pointer (true);
7165 dest = gen_reg_rtx (Pmode);
7166 emit_insn (gen_subsi3 (dest, base, off));
7170 case TLS_MODEL_LOCAL_EXEC:
7171 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7172 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7173 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7174 off = gen_rtx_CONST (Pmode, off);
7176 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7178 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7179 return gen_rtx_PLUS (Pmode, base, off);
7183 base = get_thread_pointer (true);
7184 dest = gen_reg_rtx (Pmode);
7185 emit_insn (gen_subsi3 (dest, base, off));
7196 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7199 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7200 htab_t dllimport_map;
7203 get_dllimport_decl (tree decl)
7205 struct tree_map *h, in;
7209 size_t namelen, prefixlen;
7215 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7217 in.hash = htab_hash_pointer (decl);
7218 in.base.from = decl;
7219 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7224 *loc = h = ggc_alloc (sizeof (struct tree_map));
7226 h->base.from = decl;
7227 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7228 DECL_ARTIFICIAL (to) = 1;
7229 DECL_IGNORED_P (to) = 1;
7230 DECL_EXTERNAL (to) = 1;
7231 TREE_READONLY (to) = 1;
7233 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7234 name = targetm.strip_name_encoding (name);
7235 if (name[0] == FASTCALL_PREFIX)
7241 prefix = "*__imp__";
7243 namelen = strlen (name);
7244 prefixlen = strlen (prefix);
7245 imp_name = alloca (namelen + prefixlen + 1);
7246 memcpy (imp_name, prefix, prefixlen);
7247 memcpy (imp_name + prefixlen, name, namelen + 1);
7249 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7250 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7251 SET_SYMBOL_REF_DECL (rtl, to);
7252 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7254 rtl = gen_const_mem (Pmode, rtl);
7255 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7257 SET_DECL_RTL (to, rtl);
7262 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7263 true if we require the result be a register. */
7266 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7271 gcc_assert (SYMBOL_REF_DECL (symbol));
7272 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7274 x = DECL_RTL (imp_decl);
7276 x = force_reg (Pmode, x);
7280 /* Try machine-dependent ways of modifying an illegitimate address
7281 to be legitimate. If we find one, return the new, valid address.
7282 This macro is used in only one place: `memory_address' in explow.c.
7284 OLDX is the address as it was before break_out_memory_refs was called.
7285 In some cases it is useful to look at this to decide what needs to be done.
7287 MODE and WIN are passed so that this macro can use
7288 GO_IF_LEGITIMATE_ADDRESS.
7290 It is always safe for this macro to do nothing. It exists to recognize
7291 opportunities to optimize the output.
7293 For the 80386, we handle X+REG by loading X into a register R and
7294 using R+REG. R will go in a general reg and indexing will be used.
7295 However, if REG is a broken-out memory address or multiplication,
7296 nothing needs to be done because REG can certainly go in a general reg.
7298 When -fpic is used, special handling is needed for symbolic references.
7299 See comments by legitimize_pic_address in i386.c for details. */
7302 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7307 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7309 return legitimize_tls_address (x, log, false);
7310 if (GET_CODE (x) == CONST
7311 && GET_CODE (XEXP (x, 0)) == PLUS
7312 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7313 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7315 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7316 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7319 if (flag_pic && SYMBOLIC_CONST (x))
7320 return legitimize_pic_address (x, 0);
7322 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7324 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7325 return legitimize_dllimport_symbol (x, true);
7326 if (GET_CODE (x) == CONST
7327 && GET_CODE (XEXP (x, 0)) == PLUS
7328 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7329 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7331 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7332 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7336 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7337 if (GET_CODE (x) == ASHIFT
7338 && CONST_INT_P (XEXP (x, 1))
7339 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7342 log = INTVAL (XEXP (x, 1));
7343 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7344 GEN_INT (1 << log));
7347 if (GET_CODE (x) == PLUS)
7349 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7351 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7352 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7353 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7356 log = INTVAL (XEXP (XEXP (x, 0), 1));
7357 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7358 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7359 GEN_INT (1 << log));
7362 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7363 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7364 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7367 log = INTVAL (XEXP (XEXP (x, 1), 1));
7368 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7369 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7370 GEN_INT (1 << log));
7373 /* Put multiply first if it isn't already. */
7374 if (GET_CODE (XEXP (x, 1)) == MULT)
7376 rtx tmp = XEXP (x, 0);
7377 XEXP (x, 0) = XEXP (x, 1);
7382 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7383 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7384 created by virtual register instantiation, register elimination, and
7385 similar optimizations. */
7386 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7389 x = gen_rtx_PLUS (Pmode,
7390 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7391 XEXP (XEXP (x, 1), 0)),
7392 XEXP (XEXP (x, 1), 1));
7396 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7397 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7398 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7399 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7400 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7401 && CONSTANT_P (XEXP (x, 1)))
7404 rtx other = NULL_RTX;
7406 if (CONST_INT_P (XEXP (x, 1)))
7408 constant = XEXP (x, 1);
7409 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7411 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7413 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7414 other = XEXP (x, 1);
7422 x = gen_rtx_PLUS (Pmode,
7423 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7424 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7425 plus_constant (other, INTVAL (constant)));
7429 if (changed && legitimate_address_p (mode, x, FALSE))
7432 if (GET_CODE (XEXP (x, 0)) == MULT)
7435 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7438 if (GET_CODE (XEXP (x, 1)) == MULT)
7441 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7445 && REG_P (XEXP (x, 1))
7446 && REG_P (XEXP (x, 0)))
7449 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7452 x = legitimize_pic_address (x, 0);
7455 if (changed && legitimate_address_p (mode, x, FALSE))
7458 if (REG_P (XEXP (x, 0)))
7460 rtx temp = gen_reg_rtx (Pmode);
7461 rtx val = force_operand (XEXP (x, 1), temp);
7463 emit_move_insn (temp, val);
7469 else if (REG_P (XEXP (x, 1)))
7471 rtx temp = gen_reg_rtx (Pmode);
7472 rtx val = force_operand (XEXP (x, 0), temp);
7474 emit_move_insn (temp, val);
7484 /* Print an integer constant expression in assembler syntax. Addition
7485 and subtraction are the only arithmetic that may appear in these
7486 expressions. FILE is the stdio stream to write to, X is the rtx, and
7487 CODE is the operand print code from the output string. */
7490 output_pic_addr_const (FILE *file, rtx x, int code)
7494 switch (GET_CODE (x))
7497 gcc_assert (flag_pic);
7502 if (! TARGET_MACHO || TARGET_64BIT)
7503 output_addr_const (file, x);
7506 const char *name = XSTR (x, 0);
7508 /* Mark the decl as referenced so that cgraph will output the function. */
7509 if (SYMBOL_REF_DECL (x))
7510 mark_decl_referenced (SYMBOL_REF_DECL (x));
7513 if (MACHOPIC_INDIRECT
7514 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7515 name = machopic_indirection_name (x, /*stub_p=*/true);
7517 assemble_name (file, name);
7519 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7520 fputs ("@PLT", file);
7527 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7528 assemble_name (asm_out_file, buf);
7532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7536 /* This used to output parentheses around the expression,
7537 but that does not work on the 386 (either ATT or BSD assembler). */
7538 output_pic_addr_const (file, XEXP (x, 0), code);
7542 if (GET_MODE (x) == VOIDmode)
7544 /* We can use %d if the number is <32 bits and positive. */
7545 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7546 fprintf (file, "0x%lx%08lx",
7547 (unsigned long) CONST_DOUBLE_HIGH (x),
7548 (unsigned long) CONST_DOUBLE_LOW (x));
7550 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7553 /* We can't handle floating point constants;
7554 PRINT_OPERAND must handle them. */
7555 output_operand_lossage ("floating constant misused");
7559 /* Some assemblers need integer constants to appear first. */
7560 if (CONST_INT_P (XEXP (x, 0)))
7562 output_pic_addr_const (file, XEXP (x, 0), code);
7564 output_pic_addr_const (file, XEXP (x, 1), code);
7568 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7569 output_pic_addr_const (file, XEXP (x, 1), code);
7571 output_pic_addr_const (file, XEXP (x, 0), code);
7577 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7578 output_pic_addr_const (file, XEXP (x, 0), code);
7580 output_pic_addr_const (file, XEXP (x, 1), code);
7582 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7586 gcc_assert (XVECLEN (x, 0) == 1);
7587 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7588 switch (XINT (x, 1))
7591 fputs ("@GOT", file);
7594 fputs ("@GOTOFF", file);
7597 fputs ("@PLTOFF", file);
7599 case UNSPEC_GOTPCREL:
7600 fputs ("@GOTPCREL(%rip)", file);
7602 case UNSPEC_GOTTPOFF:
7603 /* FIXME: This might be @TPOFF in Sun ld too. */
7604 fputs ("@GOTTPOFF", file);
7607 fputs ("@TPOFF", file);
7611 fputs ("@TPOFF", file);
7613 fputs ("@NTPOFF", file);
7616 fputs ("@DTPOFF", file);
7618 case UNSPEC_GOTNTPOFF:
7620 fputs ("@GOTTPOFF(%rip)", file);
7622 fputs ("@GOTNTPOFF", file);
7624 case UNSPEC_INDNTPOFF:
7625 fputs ("@INDNTPOFF", file);
7628 output_operand_lossage ("invalid UNSPEC as operand");
7634 output_operand_lossage ("invalid expression as operand");
7638 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7639 We need to emit DTP-relative relocations. */
7641 static void ATTRIBUTE_UNUSED
7642 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7644 fputs (ASM_LONG, file);
7645 output_addr_const (file, x);
7646 fputs ("@DTPOFF", file);
7652 fputs (", 0", file);
7659 /* In the name of slightly smaller debug output, and to cater to
7660 general assembler lossage, recognize PIC+GOTOFF and turn it back
7661 into a direct symbol reference.
7663 On Darwin, this is necessary to avoid a crash, because Darwin
7664 has a different PIC label for each routine but the DWARF debugging
7665 information is not associated with any particular routine, so it's
7666 necessary to remove references to the PIC label from RTL stored by
7667 the DWARF output code. */
7670 ix86_delegitimize_address (rtx orig_x)
7673 /* reg_addend is NULL or a multiple of some register. */
7674 rtx reg_addend = NULL_RTX;
7675 /* const_addend is NULL or a const_int. */
7676 rtx const_addend = NULL_RTX;
7677 /* This is the result, or NULL. */
7678 rtx result = NULL_RTX;
7685 if (GET_CODE (x) != CONST
7686 || GET_CODE (XEXP (x, 0)) != UNSPEC
7687 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7690 return XVECEXP (XEXP (x, 0), 0, 0);
7693 if (GET_CODE (x) != PLUS
7694 || GET_CODE (XEXP (x, 1)) != CONST)
7697 if (REG_P (XEXP (x, 0))
7698 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7699 /* %ebx + GOT/GOTOFF */
7701 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7703 /* %ebx + %reg * scale + GOT/GOTOFF */
7704 reg_addend = XEXP (x, 0);
7705 if (REG_P (XEXP (reg_addend, 0))
7706 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7707 reg_addend = XEXP (reg_addend, 1);
7708 else if (REG_P (XEXP (reg_addend, 1))
7709 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7710 reg_addend = XEXP (reg_addend, 0);
7713 if (!REG_P (reg_addend)
7714 && GET_CODE (reg_addend) != MULT
7715 && GET_CODE (reg_addend) != ASHIFT)
7721 x = XEXP (XEXP (x, 1), 0);
7722 if (GET_CODE (x) == PLUS
7723 && CONST_INT_P (XEXP (x, 1)))
7725 const_addend = XEXP (x, 1);
7729 if (GET_CODE (x) == UNSPEC
7730 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7731 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7732 result = XVECEXP (x, 0, 0);
7734 if (TARGET_MACHO && darwin_local_data_pic (x)
7736 result = XEXP (x, 0);
7742 result = gen_rtx_PLUS (Pmode, result, const_addend);
7744 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7748 /* If X is a machine specific address (i.e. a symbol or label being
7749 referenced as a displacement from the GOT implemented using an
7750 UNSPEC), then return the base term. Otherwise return X. */
7753 ix86_find_base_term (rtx x)
7759 if (GET_CODE (x) != CONST)
7762 if (GET_CODE (term) == PLUS
7763 && (CONST_INT_P (XEXP (term, 1))
7764 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7765 term = XEXP (term, 0);
7766 if (GET_CODE (term) != UNSPEC
7767 || XINT (term, 1) != UNSPEC_GOTPCREL)
7770 term = XVECEXP (term, 0, 0);
7772 if (GET_CODE (term) != SYMBOL_REF
7773 && GET_CODE (term) != LABEL_REF)
7779 term = ix86_delegitimize_address (x);
7781 if (GET_CODE (term) != SYMBOL_REF
7782 && GET_CODE (term) != LABEL_REF)
7789 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7794 if (mode == CCFPmode || mode == CCFPUmode)
7796 enum rtx_code second_code, bypass_code;
7797 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7798 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7799 code = ix86_fp_compare_code_to_integer (code);
7803 code = reverse_condition (code);
7814 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7818 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7819 Those same assemblers have the same but opposite lossage on cmov. */
7820 gcc_assert (mode == CCmode);
7821 suffix = fp ? "nbe" : "a";
7841 gcc_assert (mode == CCmode);
7863 gcc_assert (mode == CCmode);
7864 suffix = fp ? "nb" : "ae";
7867 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7871 gcc_assert (mode == CCmode);
7875 suffix = fp ? "u" : "p";
7878 suffix = fp ? "nu" : "np";
7883 fputs (suffix, file);
7886 /* Print the name of register X to FILE based on its machine mode and number.
7887 If CODE is 'w', pretend the mode is HImode.
7888 If CODE is 'b', pretend the mode is QImode.
7889 If CODE is 'k', pretend the mode is SImode.
7890 If CODE is 'q', pretend the mode is DImode.
7891 If CODE is 'h', pretend the reg is the 'high' byte register.
7892 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7895 print_reg (rtx x, int code, FILE *file)
7897 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7898 && REGNO (x) != FRAME_POINTER_REGNUM
7899 && REGNO (x) != FLAGS_REG
7900 && REGNO (x) != FPSR_REG
7901 && REGNO (x) != FPCR_REG);
7903 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7906 if (code == 'w' || MMX_REG_P (x))
7908 else if (code == 'b')
7910 else if (code == 'k')
7912 else if (code == 'q')
7914 else if (code == 'y')
7916 else if (code == 'h')
7919 code = GET_MODE_SIZE (GET_MODE (x));
7921 /* Irritatingly, AMD extended registers use different naming convention
7922 from the normal registers. */
7923 if (REX_INT_REG_P (x))
7925 gcc_assert (TARGET_64BIT);
7929 error ("extended registers have no high halves");
7932 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7935 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7938 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7941 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7944 error ("unsupported operand size for extended register");
7952 if (STACK_TOP_P (x))
7954 fputs ("st(0)", file);
7961 if (! ANY_FP_REG_P (x))
7962 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7967 fputs (hi_reg_name[REGNO (x)], file);
7970 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7972 fputs (qi_reg_name[REGNO (x)], file);
7975 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7977 fputs (qi_high_reg_name[REGNO (x)], file);
7984 /* Locate some local-dynamic symbol still in use by this function
7985 so that we can print its name in some tls_local_dynamic_base
7989 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7993 if (GET_CODE (x) == SYMBOL_REF
7994 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7996 cfun->machine->some_ld_name = XSTR (x, 0);
8004 get_some_local_dynamic_name (void)
8008 if (cfun->machine->some_ld_name)
8009 return cfun->machine->some_ld_name;
8011 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8013 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8014 return cfun->machine->some_ld_name;
8020 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8021 C -- print opcode suffix for set/cmov insn.
8022 c -- like C, but print reversed condition
8023 F,f -- likewise, but for floating-point.
8024 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8026 R -- print the prefix for register names.
8027 z -- print the opcode suffix for the size of the current operand.
8028 * -- print a star (in certain assembler syntax)
8029 A -- print an absolute memory reference.
8030 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8031 s -- print a shift double count, followed by the assemblers argument
8033 b -- print the QImode name of the register for the indicated operand.
8034 %b0 would print %al if operands[0] is reg 0.
8035 w -- likewise, print the HImode name of the register.
8036 k -- likewise, print the SImode name of the register.
8037 q -- likewise, print the DImode name of the register.
8038 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8039 y -- print "st(0)" instead of "st" as a register.
8040 D -- print condition for SSE cmp instruction.
8041 P -- if PIC, print an @PLT suffix.
8042 X -- don't print any sort of PIC '@' suffix for a symbol.
8043 & -- print some in-use local-dynamic symbol name.
8044 H -- print a memory address offset by 8; used for sse high-parts
8048 print_operand (FILE *file, rtx x, int code)
8055 if (ASSEMBLER_DIALECT == ASM_ATT)
8060 assemble_name (file, get_some_local_dynamic_name ());
8064 switch (ASSEMBLER_DIALECT)
8071 /* Intel syntax. For absolute addresses, registers should not
8072 be surrounded by braces. */
8076 PRINT_OPERAND (file, x, 0);
8086 PRINT_OPERAND (file, x, 0);
8091 if (ASSEMBLER_DIALECT == ASM_ATT)
8096 if (ASSEMBLER_DIALECT == ASM_ATT)
8101 if (ASSEMBLER_DIALECT == ASM_ATT)
8106 if (ASSEMBLER_DIALECT == ASM_ATT)
8111 if (ASSEMBLER_DIALECT == ASM_ATT)
8116 if (ASSEMBLER_DIALECT == ASM_ATT)
8121 /* 387 opcodes don't get size suffixes if the operands are
8123 if (STACK_REG_P (x))
8126 /* Likewise if using Intel opcodes. */
8127 if (ASSEMBLER_DIALECT == ASM_INTEL)
8130 /* This is the size of op from size of operand. */
8131 switch (GET_MODE_SIZE (GET_MODE (x)))
8138 #ifdef HAVE_GAS_FILDS_FISTS
8144 if (GET_MODE (x) == SFmode)
8159 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8161 #ifdef GAS_MNEMONICS
8187 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8189 PRINT_OPERAND (file, x, 0);
8195 /* Little bit of braindamage here. The SSE compare instructions
8196 does use completely different names for the comparisons that the
8197 fp conditional moves. */
8198 switch (GET_CODE (x))
8213 fputs ("unord", file);
8217 fputs ("neq", file);
8221 fputs ("nlt", file);
8225 fputs ("nle", file);
8228 fputs ("ord", file);
8235 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8236 if (ASSEMBLER_DIALECT == ASM_ATT)
8238 switch (GET_MODE (x))
8240 case HImode: putc ('w', file); break;
8242 case SFmode: putc ('l', file); break;
8244 case DFmode: putc ('q', file); break;
8245 default: gcc_unreachable ();
8252 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8255 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8256 if (ASSEMBLER_DIALECT == ASM_ATT)
8259 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8262 /* Like above, but reverse condition */
8264 /* Check to see if argument to %c is really a constant
8265 and not a condition code which needs to be reversed. */
8266 if (!COMPARISON_P (x))
8268 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8271 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8274 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8275 if (ASSEMBLER_DIALECT == ASM_ATT)
8278 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8282 /* It doesn't actually matter what mode we use here, as we're
8283 only going to use this for printing. */
8284 x = adjust_address_nv (x, DImode, 8);
8291 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8294 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8297 int pred_val = INTVAL (XEXP (x, 0));
8299 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8300 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8302 int taken = pred_val > REG_BR_PROB_BASE / 2;
8303 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8305 /* Emit hints only in the case default branch prediction
8306 heuristics would fail. */
8307 if (taken != cputaken)
8309 /* We use 3e (DS) prefix for taken branches and
8310 2e (CS) prefix for not taken branches. */
8312 fputs ("ds ; ", file);
8314 fputs ("cs ; ", file);
8321 output_operand_lossage ("invalid operand code '%c'", code);
8326 print_reg (x, code, file);
8330 /* No `byte ptr' prefix for call instructions. */
8331 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8334 switch (GET_MODE_SIZE (GET_MODE (x)))
8336 case 1: size = "BYTE"; break;
8337 case 2: size = "WORD"; break;
8338 case 4: size = "DWORD"; break;
8339 case 8: size = "QWORD"; break;
8340 case 12: size = "XWORD"; break;
8341 case 16: size = "XMMWORD"; break;
8346 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8349 else if (code == 'w')
8351 else if (code == 'k')
8355 fputs (" PTR ", file);
8359 /* Avoid (%rip) for call operands. */
8360 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8361 && !CONST_INT_P (x))
8362 output_addr_const (file, x);
8363 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8364 output_operand_lossage ("invalid constraints for operand");
8369 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8375 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8377 if (ASSEMBLER_DIALECT == ASM_ATT)
8379 fprintf (file, "0x%08lx", l);
8382 /* These float cases don't actually occur as immediate operands. */
8383 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8387 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8388 fprintf (file, "%s", dstr);
8391 else if (GET_CODE (x) == CONST_DOUBLE
8392 && GET_MODE (x) == XFmode)
8396 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8397 fprintf (file, "%s", dstr);
8402 /* We have patterns that allow zero sets of memory, for instance.
8403 In 64-bit mode, we should probably support all 8-byte vectors,
8404 since we can in fact encode that into an immediate. */
8405 if (GET_CODE (x) == CONST_VECTOR)
8407 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8413 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8415 if (ASSEMBLER_DIALECT == ASM_ATT)
8418 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8419 || GET_CODE (x) == LABEL_REF)
8421 if (ASSEMBLER_DIALECT == ASM_ATT)
8424 fputs ("OFFSET FLAT:", file);
8427 if (CONST_INT_P (x))
8428 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8430 output_pic_addr_const (file, x, code);
8432 output_addr_const (file, x);
8436 /* Print a memory operand whose address is ADDR. */
8439 print_operand_address (FILE *file, rtx addr)
8441 struct ix86_address parts;
8442 rtx base, index, disp;
8444 int ok = ix86_decompose_address (addr, &parts);
8449 index = parts.index;
8451 scale = parts.scale;
8459 if (USER_LABEL_PREFIX[0] == 0)
8461 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8467 if (!base && !index)
8469 /* Displacement only requires special attention. */
8471 if (CONST_INT_P (disp))
8473 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8475 if (USER_LABEL_PREFIX[0] == 0)
8477 fputs ("ds:", file);
8479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8482 output_pic_addr_const (file, disp, 0);
8484 output_addr_const (file, disp);
8486 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8489 if (GET_CODE (disp) == CONST
8490 && GET_CODE (XEXP (disp, 0)) == PLUS
8491 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8492 disp = XEXP (XEXP (disp, 0), 0);
8493 if (GET_CODE (disp) == LABEL_REF
8494 || (GET_CODE (disp) == SYMBOL_REF
8495 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8496 fputs ("(%rip)", file);
8501 if (ASSEMBLER_DIALECT == ASM_ATT)
8506 output_pic_addr_const (file, disp, 0);
8507 else if (GET_CODE (disp) == LABEL_REF)
8508 output_asm_label (disp);
8510 output_addr_const (file, disp);
8515 print_reg (base, 0, file);
8519 print_reg (index, 0, file);
8521 fprintf (file, ",%d", scale);
8527 rtx offset = NULL_RTX;
8531 /* Pull out the offset of a symbol; print any symbol itself. */
8532 if (GET_CODE (disp) == CONST
8533 && GET_CODE (XEXP (disp, 0)) == PLUS
8534 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8536 offset = XEXP (XEXP (disp, 0), 1);
8537 disp = gen_rtx_CONST (VOIDmode,
8538 XEXP (XEXP (disp, 0), 0));
8542 output_pic_addr_const (file, disp, 0);
8543 else if (GET_CODE (disp) == LABEL_REF)
8544 output_asm_label (disp);
8545 else if (CONST_INT_P (disp))
8548 output_addr_const (file, disp);
8554 print_reg (base, 0, file);
8557 if (INTVAL (offset) >= 0)
8559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8570 print_reg (index, 0, file);
8572 fprintf (file, "*%d", scale);
8580 output_addr_const_extra (FILE *file, rtx x)
8584 if (GET_CODE (x) != UNSPEC)
8587 op = XVECEXP (x, 0, 0);
8588 switch (XINT (x, 1))
8590 case UNSPEC_GOTTPOFF:
8591 output_addr_const (file, op);
8592 /* FIXME: This might be @TPOFF in Sun ld. */
8593 fputs ("@GOTTPOFF", file);
8596 output_addr_const (file, op);
8597 fputs ("@TPOFF", file);
8600 output_addr_const (file, op);
8602 fputs ("@TPOFF", file);
8604 fputs ("@NTPOFF", file);
8607 output_addr_const (file, op);
8608 fputs ("@DTPOFF", file);
8610 case UNSPEC_GOTNTPOFF:
8611 output_addr_const (file, op);
8613 fputs ("@GOTTPOFF(%rip)", file);
8615 fputs ("@GOTNTPOFF", file);
8617 case UNSPEC_INDNTPOFF:
8618 output_addr_const (file, op);
8619 fputs ("@INDNTPOFF", file);
8629 /* Split one or more DImode RTL references into pairs of SImode
8630 references. The RTL can be REG, offsettable MEM, integer constant, or
8631 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8632 split and "num" is its length. lo_half and hi_half are output arrays
8633 that parallel "operands". */
8636 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8640 rtx op = operands[num];
8642 /* simplify_subreg refuse to split volatile memory addresses,
8643 but we still have to handle it. */
8646 lo_half[num] = adjust_address (op, SImode, 0);
8647 hi_half[num] = adjust_address (op, SImode, 4);
8651 lo_half[num] = simplify_gen_subreg (SImode, op,
8652 GET_MODE (op) == VOIDmode
8653 ? DImode : GET_MODE (op), 0);
8654 hi_half[num] = simplify_gen_subreg (SImode, op,
8655 GET_MODE (op) == VOIDmode
8656 ? DImode : GET_MODE (op), 4);
8660 /* Split one or more TImode RTL references into pairs of DImode
8661 references. The RTL can be REG, offsettable MEM, integer constant, or
8662 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8663 split and "num" is its length. lo_half and hi_half are output arrays
8664 that parallel "operands". */
8667 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8671 rtx op = operands[num];
8673 /* simplify_subreg refuse to split volatile memory addresses, but we
8674 still have to handle it. */
8677 lo_half[num] = adjust_address (op, DImode, 0);
8678 hi_half[num] = adjust_address (op, DImode, 8);
8682 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8683 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8688 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8689 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8690 is the expression of the binary operation. The output may either be
8691 emitted here, or returned to the caller, like all output_* functions.
8693 There is no guarantee that the operands are the same mode, as they
8694 might be within FLOAT or FLOAT_EXTEND expressions. */
8696 #ifndef SYSV386_COMPAT
8697 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8698 wants to fix the assemblers because that causes incompatibility
8699 with gcc. No-one wants to fix gcc because that causes
8700 incompatibility with assemblers... You can use the option of
8701 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8702 #define SYSV386_COMPAT 1
8706 output_387_binary_op (rtx insn, rtx *operands)
8708 static char buf[30];
8711 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8713 #ifdef ENABLE_CHECKING
8714 /* Even if we do not want to check the inputs, this documents input
8715 constraints. Which helps in understanding the following code. */
8716 if (STACK_REG_P (operands[0])
8717 && ((REG_P (operands[1])
8718 && REGNO (operands[0]) == REGNO (operands[1])
8719 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8720 || (REG_P (operands[2])
8721 && REGNO (operands[0]) == REGNO (operands[2])
8722 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8723 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8726 gcc_assert (is_sse);
8729 switch (GET_CODE (operands[3]))
8732 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8733 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8741 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8742 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8750 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8751 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8759 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8760 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8774 if (GET_MODE (operands[0]) == SFmode)
8775 strcat (buf, "ss\t{%2, %0|%0, %2}");
8777 strcat (buf, "sd\t{%2, %0|%0, %2}");
8782 switch (GET_CODE (operands[3]))
8786 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8788 rtx temp = operands[2];
8789 operands[2] = operands[1];
8793 /* know operands[0] == operands[1]. */
8795 if (MEM_P (operands[2]))
8801 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8803 if (STACK_TOP_P (operands[0]))
8804 /* How is it that we are storing to a dead operand[2]?
8805 Well, presumably operands[1] is dead too. We can't
8806 store the result to st(0) as st(0) gets popped on this
8807 instruction. Instead store to operands[2] (which I
8808 think has to be st(1)). st(1) will be popped later.
8809 gcc <= 2.8.1 didn't have this check and generated
8810 assembly code that the Unixware assembler rejected. */
8811 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8813 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8817 if (STACK_TOP_P (operands[0]))
8818 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8820 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8825 if (MEM_P (operands[1]))
8831 if (MEM_P (operands[2]))
8837 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8840 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8841 derived assemblers, confusingly reverse the direction of
8842 the operation for fsub{r} and fdiv{r} when the
8843 destination register is not st(0). The Intel assembler
8844 doesn't have this brain damage. Read !SYSV386_COMPAT to
8845 figure out what the hardware really does. */
8846 if (STACK_TOP_P (operands[0]))
8847 p = "{p\t%0, %2|rp\t%2, %0}";
8849 p = "{rp\t%2, %0|p\t%0, %2}";
8851 if (STACK_TOP_P (operands[0]))
8852 /* As above for fmul/fadd, we can't store to st(0). */
8853 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8855 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8860 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8863 if (STACK_TOP_P (operands[0]))
8864 p = "{rp\t%0, %1|p\t%1, %0}";
8866 p = "{p\t%1, %0|rp\t%0, %1}";
8868 if (STACK_TOP_P (operands[0]))
8869 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8871 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8876 if (STACK_TOP_P (operands[0]))
8878 if (STACK_TOP_P (operands[1]))
8879 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8881 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8884 else if (STACK_TOP_P (operands[1]))
8887 p = "{\t%1, %0|r\t%0, %1}";
8889 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8895 p = "{r\t%2, %0|\t%0, %2}";
8897 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8910 /* Return needed mode for entity in optimize_mode_switching pass. */
8913 ix86_mode_needed (int entity, rtx insn)
8915 enum attr_i387_cw mode;
8917 /* The mode UNINITIALIZED is used to store control word after a
8918 function call or ASM pattern. The mode ANY specify that function
8919 has no requirements on the control word and make no changes in the
8920 bits we are interested in. */
8923 || (NONJUMP_INSN_P (insn)
8924 && (asm_noperands (PATTERN (insn)) >= 0
8925 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8926 return I387_CW_UNINITIALIZED;
8928 if (recog_memoized (insn) < 0)
8931 mode = get_attr_i387_cw (insn);
8936 if (mode == I387_CW_TRUNC)
8941 if (mode == I387_CW_FLOOR)
8946 if (mode == I387_CW_CEIL)
8951 if (mode == I387_CW_MASK_PM)
8962 /* Output code to initialize control word copies used by trunc?f?i and
8963 rounding patterns. CURRENT_MODE is set to current control word,
8964 while NEW_MODE is set to new control word. */
8967 emit_i387_cw_initialization (int mode)
8969 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8974 rtx reg = gen_reg_rtx (HImode);
8976 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8977 emit_move_insn (reg, copy_rtx (stored_mode));
8979 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8984 /* round toward zero (truncate) */
8985 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8986 slot = SLOT_CW_TRUNC;
8990 /* round down toward -oo */
8991 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8992 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8993 slot = SLOT_CW_FLOOR;
8997 /* round up toward +oo */
8998 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8999 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9000 slot = SLOT_CW_CEIL;
9003 case I387_CW_MASK_PM:
9004 /* mask precision exception for nearbyint() */
9005 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9006 slot = SLOT_CW_MASK_PM;
9018 /* round toward zero (truncate) */
9019 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9020 slot = SLOT_CW_TRUNC;
9024 /* round down toward -oo */
9025 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9026 slot = SLOT_CW_FLOOR;
9030 /* round up toward +oo */
9031 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9032 slot = SLOT_CW_CEIL;
9035 case I387_CW_MASK_PM:
9036 /* mask precision exception for nearbyint() */
9037 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9038 slot = SLOT_CW_MASK_PM;
9046 gcc_assert (slot < MAX_386_STACK_LOCALS);
9048 new_mode = assign_386_stack_local (HImode, slot);
9049 emit_move_insn (new_mode, reg);
9052 /* Output code for INSN to convert a float to a signed int. OPERANDS
9053 are the insn operands. The output may be [HSD]Imode and the input
9054 operand may be [SDX]Fmode. */
9057 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9059 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9060 int dimode_p = GET_MODE (operands[0]) == DImode;
9061 int round_mode = get_attr_i387_cw (insn);
9063 /* Jump through a hoop or two for DImode, since the hardware has no
9064 non-popping instruction. We used to do this a different way, but
9065 that was somewhat fragile and broke with post-reload splitters. */
9066 if ((dimode_p || fisttp) && !stack_top_dies)
9067 output_asm_insn ("fld\t%y1", operands);
9069 gcc_assert (STACK_TOP_P (operands[1]));
9070 gcc_assert (MEM_P (operands[0]));
9073 output_asm_insn ("fisttp%z0\t%0", operands);
9076 if (round_mode != I387_CW_ANY)
9077 output_asm_insn ("fldcw\t%3", operands);
9078 if (stack_top_dies || dimode_p)
9079 output_asm_insn ("fistp%z0\t%0", operands);
9081 output_asm_insn ("fist%z0\t%0", operands);
9082 if (round_mode != I387_CW_ANY)
9083 output_asm_insn ("fldcw\t%2", operands);
9089 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9090 have the values zero or one, indicates the ffreep insn's operand
9091 from the OPERANDS array. */
9094 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9096 if (TARGET_USE_FFREEP)
9097 #if HAVE_AS_IX86_FFREEP
9098 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9101 static char retval[] = ".word\t0xc_df";
9102 int regno = REGNO (operands[opno]);
9104 gcc_assert (FP_REGNO_P (regno));
9106 retval[9] = '0' + (regno - FIRST_STACK_REG);
9111 return opno ? "fstp\t%y1" : "fstp\t%y0";
9115 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9116 should be used. UNORDERED_P is true when fucom should be used. */
9119 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9122 rtx cmp_op0, cmp_op1;
9123 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9127 cmp_op0 = operands[0];
9128 cmp_op1 = operands[1];
9132 cmp_op0 = operands[1];
9133 cmp_op1 = operands[2];
9138 if (GET_MODE (operands[0]) == SFmode)
9140 return "ucomiss\t{%1, %0|%0, %1}";
9142 return "comiss\t{%1, %0|%0, %1}";
9145 return "ucomisd\t{%1, %0|%0, %1}";
9147 return "comisd\t{%1, %0|%0, %1}";
9150 gcc_assert (STACK_TOP_P (cmp_op0));
9152 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9154 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9158 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9159 return output_387_ffreep (operands, 1);
9162 return "ftst\n\tfnstsw\t%0";
9165 if (STACK_REG_P (cmp_op1)
9167 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9168 && REGNO (cmp_op1) != FIRST_STACK_REG)
9170 /* If both the top of the 387 stack dies, and the other operand
9171 is also a stack register that dies, then this must be a
9172 `fcompp' float compare */
9176 /* There is no double popping fcomi variant. Fortunately,
9177 eflags is immune from the fstp's cc clobbering. */
9179 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9181 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9182 return output_387_ffreep (operands, 0);
9187 return "fucompp\n\tfnstsw\t%0";
9189 return "fcompp\n\tfnstsw\t%0";
9194 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9196 static const char * const alt[16] =
9198 "fcom%z2\t%y2\n\tfnstsw\t%0",
9199 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9200 "fucom%z2\t%y2\n\tfnstsw\t%0",
9201 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9203 "ficom%z2\t%y2\n\tfnstsw\t%0",
9204 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9208 "fcomi\t{%y1, %0|%0, %y1}",
9209 "fcomip\t{%y1, %0|%0, %y1}",
9210 "fucomi\t{%y1, %0|%0, %y1}",
9211 "fucomip\t{%y1, %0|%0, %y1}",
9222 mask = eflags_p << 3;
9223 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9224 mask |= unordered_p << 1;
9225 mask |= stack_top_dies;
9227 gcc_assert (mask < 16);
9236 ix86_output_addr_vec_elt (FILE *file, int value)
9238 const char *directive = ASM_LONG;
9242 directive = ASM_QUAD;
9244 gcc_assert (!TARGET_64BIT);
9247 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9251 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9253 const char *directive = ASM_LONG;
9256 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9257 directive = ASM_QUAD;
9259 gcc_assert (!TARGET_64BIT);
9261 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9262 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9263 fprintf (file, "%s%s%d-%s%d\n",
9264 directive, LPREFIX, value, LPREFIX, rel);
9265 else if (HAVE_AS_GOTOFF_IN_DATA)
9266 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9268 else if (TARGET_MACHO)
9270 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9271 machopic_output_function_base_name (file);
9272 fprintf(file, "\n");
9276 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9277 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9280 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9284 ix86_expand_clear (rtx dest)
9288 /* We play register width games, which are only valid after reload. */
9289 gcc_assert (reload_completed);
9291 /* Avoid HImode and its attendant prefix byte. */
9292 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9293 dest = gen_rtx_REG (SImode, REGNO (dest));
9295 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9297 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9298 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9300 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9301 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9307 /* X is an unchanging MEM. If it is a constant pool reference, return
9308 the constant pool rtx, else NULL. */
9311 maybe_get_pool_constant (rtx x)
9313 x = ix86_delegitimize_address (XEXP (x, 0));
9315 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9316 return get_pool_constant (x);
9322 ix86_expand_move (enum machine_mode mode, rtx operands[])
9324 int strict = (reload_in_progress || reload_completed);
9326 enum tls_model model;
9331 if (GET_CODE (op1) == SYMBOL_REF)
9333 model = SYMBOL_REF_TLS_MODEL (op1);
9336 op1 = legitimize_tls_address (op1, model, true);
9337 op1 = force_operand (op1, op0);
9341 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9342 && SYMBOL_REF_DLLIMPORT_P (op1))
9343 op1 = legitimize_dllimport_symbol (op1, false);
9345 else if (GET_CODE (op1) == CONST
9346 && GET_CODE (XEXP (op1, 0)) == PLUS
9347 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9349 rtx addend = XEXP (XEXP (op1, 0), 1);
9350 rtx symbol = XEXP (XEXP (op1, 0), 0);
9353 model = SYMBOL_REF_TLS_MODEL (symbol);
9355 tmp = legitimize_tls_address (symbol, model, true);
9356 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9357 && SYMBOL_REF_DLLIMPORT_P (symbol))
9358 tmp = legitimize_dllimport_symbol (symbol, true);
9362 tmp = force_operand (tmp, NULL);
9363 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9364 op0, 1, OPTAB_DIRECT);
9370 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9372 if (TARGET_MACHO && !TARGET_64BIT)
9377 rtx temp = ((reload_in_progress
9378 || ((op0 && REG_P (op0))
9380 ? op0 : gen_reg_rtx (Pmode));
9381 op1 = machopic_indirect_data_reference (op1, temp);
9382 op1 = machopic_legitimize_pic_address (op1, mode,
9383 temp == op1 ? 0 : temp);
9385 else if (MACHOPIC_INDIRECT)
9386 op1 = machopic_indirect_data_reference (op1, 0);
9394 op1 = force_reg (Pmode, op1);
9395 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9397 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9398 op1 = legitimize_pic_address (op1, reg);
9407 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9408 || !push_operand (op0, mode))
9410 op1 = force_reg (mode, op1);
9412 if (push_operand (op0, mode)
9413 && ! general_no_elim_operand (op1, mode))
9414 op1 = copy_to_mode_reg (mode, op1);
9416 /* Force large constants in 64bit compilation into register
9417 to get them CSEed. */
9418 if (TARGET_64BIT && mode == DImode
9419 && immediate_operand (op1, mode)
9420 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9421 && !register_operand (op0, mode)
9422 && optimize && !reload_completed && !reload_in_progress)
9423 op1 = copy_to_mode_reg (mode, op1);
9425 if (FLOAT_MODE_P (mode))
9427 /* If we are loading a floating point constant to a register,
9428 force the value to memory now, since we'll get better code
9429 out the back end. */
9433 else if (GET_CODE (op1) == CONST_DOUBLE)
9435 op1 = validize_mem (force_const_mem (mode, op1));
9436 if (!register_operand (op0, mode))
9438 rtx temp = gen_reg_rtx (mode);
9439 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9440 emit_move_insn (op0, temp);
9447 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9451 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9453 rtx op0 = operands[0], op1 = operands[1];
9455 /* Force constants other than zero into memory. We do not know how
9456 the instructions used to build constants modify the upper 64 bits
9457 of the register, once we have that information we may be able
9458 to handle some of them more efficiently. */
9459 if ((reload_in_progress | reload_completed) == 0
9460 && register_operand (op0, mode)
9462 && standard_sse_constant_p (op1) <= 0)
9463 op1 = validize_mem (force_const_mem (mode, op1));
9465 /* Make operand1 a register if it isn't already. */
9467 && !register_operand (op0, mode)
9468 && !register_operand (op1, mode))
9470 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9474 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9477 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9478 straight to ix86_expand_vector_move. */
9479 /* Code generation for scalar reg-reg moves of single and double precision data:
9480 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9484 if (x86_sse_partial_reg_dependency == true)
9489 Code generation for scalar loads of double precision data:
9490 if (x86_sse_split_regs == true)
9491 movlpd mem, reg (gas syntax)
9495 Code generation for unaligned packed loads of single precision data
9496 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9497 if (x86_sse_unaligned_move_optimal)
9500 if (x86_sse_partial_reg_dependency == true)
9512 Code generation for unaligned packed loads of double precision data
9513 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9514 if (x86_sse_unaligned_move_optimal)
9517 if (x86_sse_split_regs == true)
9530 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9539 /* If we're optimizing for size, movups is the smallest. */
9542 op0 = gen_lowpart (V4SFmode, op0);
9543 op1 = gen_lowpart (V4SFmode, op1);
9544 emit_insn (gen_sse_movups (op0, op1));
9548 /* ??? If we have typed data, then it would appear that using
9549 movdqu is the only way to get unaligned data loaded with
9551 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9553 op0 = gen_lowpart (V16QImode, op0);
9554 op1 = gen_lowpart (V16QImode, op1);
9555 emit_insn (gen_sse2_movdqu (op0, op1));
9559 if (TARGET_SSE2 && mode == V2DFmode)
9563 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9565 op0 = gen_lowpart (V2DFmode, op0);
9566 op1 = gen_lowpart (V2DFmode, op1);
9567 emit_insn (gen_sse2_movupd (op0, op1));
9571 /* When SSE registers are split into halves, we can avoid
9572 writing to the top half twice. */
9573 if (TARGET_SSE_SPLIT_REGS)
9575 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9580 /* ??? Not sure about the best option for the Intel chips.
9581 The following would seem to satisfy; the register is
9582 entirely cleared, breaking the dependency chain. We
9583 then store to the upper half, with a dependency depth
9584 of one. A rumor has it that Intel recommends two movsd
9585 followed by an unpacklpd, but this is unconfirmed. And
9586 given that the dependency depth of the unpacklpd would
9587 still be one, I'm not sure why this would be better. */
9588 zero = CONST0_RTX (V2DFmode);
9591 m = adjust_address (op1, DFmode, 0);
9592 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9593 m = adjust_address (op1, DFmode, 8);
9594 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9598 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9600 op0 = gen_lowpart (V4SFmode, op0);
9601 op1 = gen_lowpart (V4SFmode, op1);
9602 emit_insn (gen_sse_movups (op0, op1));
9606 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9607 emit_move_insn (op0, CONST0_RTX (mode));
9609 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9611 if (mode != V4SFmode)
9612 op0 = gen_lowpart (V4SFmode, op0);
9613 m = adjust_address (op1, V2SFmode, 0);
9614 emit_insn (gen_sse_loadlps (op0, op0, m));
9615 m = adjust_address (op1, V2SFmode, 8);
9616 emit_insn (gen_sse_loadhps (op0, op0, m));
9619 else if (MEM_P (op0))
9621 /* If we're optimizing for size, movups is the smallest. */
9624 op0 = gen_lowpart (V4SFmode, op0);
9625 op1 = gen_lowpart (V4SFmode, op1);
9626 emit_insn (gen_sse_movups (op0, op1));
9630 /* ??? Similar to above, only less clear because of quote
9631 typeless stores unquote. */
9632 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9633 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9635 op0 = gen_lowpart (V16QImode, op0);
9636 op1 = gen_lowpart (V16QImode, op1);
9637 emit_insn (gen_sse2_movdqu (op0, op1));
9641 if (TARGET_SSE2 && mode == V2DFmode)
9643 m = adjust_address (op0, DFmode, 0);
9644 emit_insn (gen_sse2_storelpd (m, op1));
9645 m = adjust_address (op0, DFmode, 8);
9646 emit_insn (gen_sse2_storehpd (m, op1));
9650 if (mode != V4SFmode)
9651 op1 = gen_lowpart (V4SFmode, op1);
9652 m = adjust_address (op0, V2SFmode, 0);
9653 emit_insn (gen_sse_storelps (m, op1));
9654 m = adjust_address (op0, V2SFmode, 8);
9655 emit_insn (gen_sse_storehps (m, op1));
9662 /* Expand a push in MODE. This is some mode for which we do not support
9663 proper push instructions, at least from the registers that we expect
9664 the value to live in. */
9667 ix86_expand_push (enum machine_mode mode, rtx x)
9671 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9672 GEN_INT (-GET_MODE_SIZE (mode)),
9673 stack_pointer_rtx, 1, OPTAB_DIRECT);
9674 if (tmp != stack_pointer_rtx)
9675 emit_move_insn (stack_pointer_rtx, tmp);
9677 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9678 emit_move_insn (tmp, x);
9681 /* Helper function of ix86_fixup_binary_operands to canonicalize
9682 operand order. Returns true if the operands should be swapped. */
9685 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9688 rtx dst = operands[0];
9689 rtx src1 = operands[1];
9690 rtx src2 = operands[2];
9692 /* If the operation is not commutative, we can't do anything. */
9693 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9696 /* Highest priority is that src1 should match dst. */
9697 if (rtx_equal_p (dst, src1))
9699 if (rtx_equal_p (dst, src2))
9702 /* Next highest priority is that immediate constants come second. */
9703 if (immediate_operand (src2, mode))
9705 if (immediate_operand (src1, mode))
9708 /* Lowest priority is that memory references should come second. */
9718 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9719 destination to use for the operation. If different from the true
9720 destination in operands[0], a copy operation will be required. */
9723 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9726 rtx dst = operands[0];
9727 rtx src1 = operands[1];
9728 rtx src2 = operands[2];
9730 /* Canonicalize operand order. */
9731 if (ix86_swap_binary_operands_p (code, mode, operands))
9738 /* Both source operands cannot be in memory. */
9739 if (MEM_P (src1) && MEM_P (src2))
9741 /* Optimization: Only read from memory once. */
9742 if (rtx_equal_p (src1, src2))
9744 src2 = force_reg (mode, src2);
9748 src2 = force_reg (mode, src2);
9751 /* If the destination is memory, and we do not have matching source
9752 operands, do things in registers. */
9753 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9754 dst = gen_reg_rtx (mode);
9756 /* Source 1 cannot be a constant. */
9757 if (CONSTANT_P (src1))
9758 src1 = force_reg (mode, src1);
9760 /* Source 1 cannot be a non-matching memory. */
9761 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9762 src1 = force_reg (mode, src1);
9769 /* Similarly, but assume that the destination has already been
9773 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9774 enum machine_mode mode, rtx operands[])
9776 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9777 gcc_assert (dst == operands[0]);
9780 /* Attempt to expand a binary operator. Make the expansion closer to the
9781 actual machine, then just general_operand, which will allow 3 separate
9782 memory references (one output, two input) in a single insn. */
9785 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9788 rtx src1, src2, dst, op, clob;
9790 dst = ix86_fixup_binary_operands (code, mode, operands);
9794 /* Emit the instruction. */
9796 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9797 if (reload_in_progress)
9799 /* Reload doesn't know about the flags register, and doesn't know that
9800 it doesn't want to clobber it. We can only do this with PLUS. */
9801 gcc_assert (code == PLUS);
9806 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9810 /* Fix up the destination if needed. */
9811 if (dst != operands[0])
9812 emit_move_insn (operands[0], dst);
9815 /* Return TRUE or FALSE depending on whether the binary operator meets the
9816 appropriate constraints. */
9819 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9822 rtx dst = operands[0];
9823 rtx src1 = operands[1];
9824 rtx src2 = operands[2];
9826 /* Both source operands cannot be in memory. */
9827 if (MEM_P (src1) && MEM_P (src2))
9830 /* Canonicalize operand order for commutative operators. */
9831 if (ix86_swap_binary_operands_p (code, mode, operands))
9838 /* If the destination is memory, we must have a matching source operand. */
9839 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9842 /* Source 1 cannot be a constant. */
9843 if (CONSTANT_P (src1))
9846 /* Source 1 cannot be a non-matching memory. */
9847 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9853 /* Attempt to expand a unary operator. Make the expansion closer to the
9854 actual machine, then just general_operand, which will allow 2 separate
9855 memory references (one output, one input) in a single insn. */
9858 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9861 int matching_memory;
9862 rtx src, dst, op, clob;
9867 /* If the destination is memory, and we do not have matching source
9868 operands, do things in registers. */
9869 matching_memory = 0;
9872 if (rtx_equal_p (dst, src))
9873 matching_memory = 1;
9875 dst = gen_reg_rtx (mode);
9878 /* When source operand is memory, destination must match. */
9879 if (MEM_P (src) && !matching_memory)
9880 src = force_reg (mode, src);
9882 /* Emit the instruction. */
9884 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9885 if (reload_in_progress || code == NOT)
9887 /* Reload doesn't know about the flags register, and doesn't know that
9888 it doesn't want to clobber it. */
9889 gcc_assert (code == NOT);
9894 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9895 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9898 /* Fix up the destination if needed. */
9899 if (dst != operands[0])
9900 emit_move_insn (operands[0], dst);
9903 /* Return TRUE or FALSE depending on whether the unary operator meets the
9904 appropriate constraints. */
9907 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9908 enum machine_mode mode ATTRIBUTE_UNUSED,
9909 rtx operands[2] ATTRIBUTE_UNUSED)
9911 /* If one of operands is memory, source and destination must match. */
9912 if ((MEM_P (operands[0])
9913 || MEM_P (operands[1]))
9914 && ! rtx_equal_p (operands[0], operands[1]))
9919 /* Post-reload splitter for converting an SF or DFmode value in an
9920 SSE register into an unsigned SImode. */
9923 ix86_split_convert_uns_si_sse (rtx operands[])
9925 enum machine_mode vecmode;
9926 rtx value, large, zero_or_two31, input, two31, x;
9928 large = operands[1];
9929 zero_or_two31 = operands[2];
9930 input = operands[3];
9931 two31 = operands[4];
9932 vecmode = GET_MODE (large);
9933 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9935 /* Load up the value into the low element. We must ensure that the other
9936 elements are valid floats -- zero is the easiest such value. */
9939 if (vecmode == V4SFmode)
9940 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9942 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9946 input = gen_rtx_REG (vecmode, REGNO (input));
9947 emit_move_insn (value, CONST0_RTX (vecmode));
9948 if (vecmode == V4SFmode)
9949 emit_insn (gen_sse_movss (value, value, input));
9951 emit_insn (gen_sse2_movsd (value, value, input));
9954 emit_move_insn (large, two31);
9955 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9957 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9958 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9960 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9961 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9963 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9964 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9966 large = gen_rtx_REG (V4SImode, REGNO (large));
9967 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9969 x = gen_rtx_REG (V4SImode, REGNO (value));
9970 if (vecmode == V4SFmode)
9971 emit_insn (gen_sse2_cvttps2dq (x, value));
9973 emit_insn (gen_sse2_cvttpd2dq (x, value));
9976 emit_insn (gen_xorv4si3 (value, value, large));
9979 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9980 Expects the 64-bit DImode to be supplied in a pair of integral
9981 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9982 -mfpmath=sse, !optimize_size only. */
9985 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9987 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9988 rtx int_xmm, fp_xmm;
9989 rtx biases, exponents;
9992 int_xmm = gen_reg_rtx (V4SImode);
9993 if (TARGET_INTER_UNIT_MOVES)
9994 emit_insn (gen_movdi_to_sse (int_xmm, input));
9995 else if (TARGET_SSE_SPLIT_REGS)
9997 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9998 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10002 x = gen_reg_rtx (V2DImode);
10003 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10004 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10007 x = gen_rtx_CONST_VECTOR (V4SImode,
10008 gen_rtvec (4, GEN_INT (0x43300000UL),
10009 GEN_INT (0x45300000UL),
10010 const0_rtx, const0_rtx));
10011 exponents = validize_mem (force_const_mem (V4SImode, x));
10013 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10014 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10016 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10017 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10018 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10019 (0x1.0p84 + double(fp_value_hi_xmm)).
10020 Note these exponents differ by 32. */
10022 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10024 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10025 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10026 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10027 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10028 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10029 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10030 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10031 biases = validize_mem (force_const_mem (V2DFmode, biases));
10032 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10034 /* Add the upper and lower DFmode values together. */
10036 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10039 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10040 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10041 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10044 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10047 /* Convert an unsigned SImode value into a DFmode. Only currently used
10048 for SSE, but applicable anywhere. */
10051 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10053 REAL_VALUE_TYPE TWO31r;
10056 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10057 NULL, 1, OPTAB_DIRECT);
10059 fp = gen_reg_rtx (DFmode);
10060 emit_insn (gen_floatsidf2 (fp, x));
10062 real_ldexp (&TWO31r, &dconst1, 31);
10063 x = const_double_from_real_value (TWO31r, DFmode);
10065 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10067 emit_move_insn (target, x);
10070 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10071 32-bit mode; otherwise we have a direct convert instruction. */
10074 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10076 REAL_VALUE_TYPE TWO32r;
10077 rtx fp_lo, fp_hi, x;
10079 fp_lo = gen_reg_rtx (DFmode);
10080 fp_hi = gen_reg_rtx (DFmode);
10082 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10084 real_ldexp (&TWO32r, &dconst1, 32);
10085 x = const_double_from_real_value (TWO32r, DFmode);
10086 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10088 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10090 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10093 emit_move_insn (target, x);
10096 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10097 For x86_32, -mfpmath=sse, !optimize_size only. */
10099 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10101 REAL_VALUE_TYPE ONE16r;
10102 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10104 real_ldexp (&ONE16r, &dconst1, 16);
10105 x = const_double_from_real_value (ONE16r, SFmode);
10106 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10107 NULL, 0, OPTAB_DIRECT);
10108 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10109 NULL, 0, OPTAB_DIRECT);
10110 fp_hi = gen_reg_rtx (SFmode);
10111 fp_lo = gen_reg_rtx (SFmode);
10112 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10113 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10114 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10116 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10118 if (!rtx_equal_p (target, fp_hi))
10119 emit_move_insn (target, fp_hi);
10122 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10123 then replicate the value for all elements of the vector
10127 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10134 v = gen_rtvec (4, value, value, value, value);
10136 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10137 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10138 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10142 v = gen_rtvec (2, value, value);
10144 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10145 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10148 gcc_unreachable ();
10152 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10153 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10154 true, then replicate the mask for all elements of the vector register.
10155 If INVERT is true, then create a mask excluding the sign bit. */
10158 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10160 enum machine_mode vec_mode;
10161 HOST_WIDE_INT hi, lo;
10166 /* Find the sign bit, sign extended to 2*HWI. */
10167 if (mode == SFmode)
10168 lo = 0x80000000, hi = lo < 0;
10169 else if (HOST_BITS_PER_WIDE_INT >= 64)
10170 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10172 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10175 lo = ~lo, hi = ~hi;
10177 /* Force this value into the low part of a fp vector constant. */
10178 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10179 mask = gen_lowpart (mode, mask);
10181 v = ix86_build_const_vector (mode, vect, mask);
10182 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10183 return force_reg (vec_mode, v);
10186 /* Generate code for floating point ABS or NEG. */
10189 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10192 rtx mask, set, use, clob, dst, src;
10193 bool matching_memory;
10194 bool use_sse = false;
10195 bool vector_mode = VECTOR_MODE_P (mode);
10196 enum machine_mode elt_mode = mode;
10200 elt_mode = GET_MODE_INNER (mode);
10203 else if (TARGET_SSE_MATH)
10204 use_sse = SSE_FLOAT_MODE_P (mode);
10206 /* NEG and ABS performed with SSE use bitwise mask operations.
10207 Create the appropriate mask now. */
10209 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10216 /* If the destination is memory, and we don't have matching source
10217 operands or we're using the x87, do things in registers. */
10218 matching_memory = false;
10221 if (use_sse && rtx_equal_p (dst, src))
10222 matching_memory = true;
10224 dst = gen_reg_rtx (mode);
10226 if (MEM_P (src) && !matching_memory)
10227 src = force_reg (mode, src);
10231 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10232 set = gen_rtx_SET (VOIDmode, dst, set);
10237 set = gen_rtx_fmt_e (code, mode, src);
10238 set = gen_rtx_SET (VOIDmode, dst, set);
10241 use = gen_rtx_USE (VOIDmode, mask);
10242 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10243 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10244 gen_rtvec (3, set, use, clob)));
10250 if (dst != operands[0])
10251 emit_move_insn (operands[0], dst);
10254 /* Expand a copysign operation. Special case operand 0 being a constant. */
10257 ix86_expand_copysign (rtx operands[])
10259 enum machine_mode mode, vmode;
10260 rtx dest, op0, op1, mask, nmask;
10262 dest = operands[0];
10266 mode = GET_MODE (dest);
10267 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10269 if (GET_CODE (op0) == CONST_DOUBLE)
10273 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10274 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10276 if (op0 == CONST0_RTX (mode))
10277 op0 = CONST0_RTX (vmode);
10280 if (mode == SFmode)
10281 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10282 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10284 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10285 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10288 mask = ix86_build_signbit_mask (mode, 0, 0);
10290 if (mode == SFmode)
10291 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10293 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10297 nmask = ix86_build_signbit_mask (mode, 0, 1);
10298 mask = ix86_build_signbit_mask (mode, 0, 0);
10300 if (mode == SFmode)
10301 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10303 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10307 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10308 be a constant, and so has already been expanded into a vector constant. */
10311 ix86_split_copysign_const (rtx operands[])
10313 enum machine_mode mode, vmode;
10314 rtx dest, op0, op1, mask, x;
10316 dest = operands[0];
10319 mask = operands[3];
10321 mode = GET_MODE (dest);
10322 vmode = GET_MODE (mask);
10324 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10325 x = gen_rtx_AND (vmode, dest, mask);
10326 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10328 if (op0 != CONST0_RTX (vmode))
10330 x = gen_rtx_IOR (vmode, dest, op0);
10331 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10335 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10336 so we have to do two masks. */
10339 ix86_split_copysign_var (rtx operands[])
10341 enum machine_mode mode, vmode;
10342 rtx dest, scratch, op0, op1, mask, nmask, x;
10344 dest = operands[0];
10345 scratch = operands[1];
10348 nmask = operands[4];
10349 mask = operands[5];
10351 mode = GET_MODE (dest);
10352 vmode = GET_MODE (mask);
10354 if (rtx_equal_p (op0, op1))
10356 /* Shouldn't happen often (it's useless, obviously), but when it does
10357 we'd generate incorrect code if we continue below. */
10358 emit_move_insn (dest, op0);
10362 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10364 gcc_assert (REGNO (op1) == REGNO (scratch));
10366 x = gen_rtx_AND (vmode, scratch, mask);
10367 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10370 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10371 x = gen_rtx_NOT (vmode, dest);
10372 x = gen_rtx_AND (vmode, x, op0);
10373 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10377 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10379 x = gen_rtx_AND (vmode, scratch, mask);
10381 else /* alternative 2,4 */
10383 gcc_assert (REGNO (mask) == REGNO (scratch));
10384 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10385 x = gen_rtx_AND (vmode, scratch, op1);
10387 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10389 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10391 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10392 x = gen_rtx_AND (vmode, dest, nmask);
10394 else /* alternative 3,4 */
10396 gcc_assert (REGNO (nmask) == REGNO (dest));
10398 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10399 x = gen_rtx_AND (vmode, dest, op0);
10401 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10404 x = gen_rtx_IOR (vmode, dest, scratch);
10405 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10408 /* Return TRUE or FALSE depending on whether the first SET in INSN
10409 has source and destination with matching CC modes, and that the
10410 CC mode is at least as constrained as REQ_MODE. */
10413 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10416 enum machine_mode set_mode;
10418 set = PATTERN (insn);
10419 if (GET_CODE (set) == PARALLEL)
10420 set = XVECEXP (set, 0, 0);
10421 gcc_assert (GET_CODE (set) == SET);
10422 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10424 set_mode = GET_MODE (SET_DEST (set));
10428 if (req_mode != CCNOmode
10429 && (req_mode != CCmode
10430 || XEXP (SET_SRC (set), 1) != const0_rtx))
10434 if (req_mode == CCGCmode)
10438 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10442 if (req_mode == CCZmode)
10449 gcc_unreachable ();
10452 return (GET_MODE (SET_SRC (set)) == set_mode);
10455 /* Generate insn patterns to do an integer compare of OPERANDS. */
10458 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10460 enum machine_mode cmpmode;
10463 cmpmode = SELECT_CC_MODE (code, op0, op1);
10464 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10466 /* This is very simple, but making the interface the same as in the
10467 FP case makes the rest of the code easier. */
10468 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10469 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10471 /* Return the test that should be put into the flags user, i.e.
10472 the bcc, scc, or cmov instruction. */
10473 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10476 /* Figure out whether to use ordered or unordered fp comparisons.
10477 Return the appropriate mode to use. */
10480 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10482 /* ??? In order to make all comparisons reversible, we do all comparisons
10483 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10484 all forms trapping and nontrapping comparisons, we can make inequality
10485 comparisons trapping again, since it results in better code when using
10486 FCOM based compares. */
10487 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10491 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10493 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10494 return ix86_fp_compare_mode (code);
10497 /* Only zero flag is needed. */
10498 case EQ: /* ZF=0 */
10499 case NE: /* ZF!=0 */
10501 /* Codes needing carry flag. */
10502 case GEU: /* CF=0 */
10503 case GTU: /* CF=0 & ZF=0 */
10504 case LTU: /* CF=1 */
10505 case LEU: /* CF=1 | ZF=1 */
10507 /* Codes possibly doable only with sign flag when
10508 comparing against zero. */
10509 case GE: /* SF=OF or SF=0 */
10510 case LT: /* SF<>OF or SF=1 */
10511 if (op1 == const0_rtx)
10514 /* For other cases Carry flag is not required. */
10516 /* Codes doable only with sign flag when comparing
10517 against zero, but we miss jump instruction for it
10518 so we need to use relational tests against overflow
10519 that thus needs to be zero. */
10520 case GT: /* ZF=0 & SF=OF */
10521 case LE: /* ZF=1 | SF<>OF */
10522 if (op1 == const0_rtx)
10526 /* strcmp pattern do (use flags) and combine may ask us for proper
10531 gcc_unreachable ();
10535 /* Return the fixed registers used for condition codes. */
10538 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10545 /* If two condition code modes are compatible, return a condition code
10546 mode which is compatible with both. Otherwise, return
10549 static enum machine_mode
10550 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10555 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10558 if ((m1 == CCGCmode && m2 == CCGOCmode)
10559 || (m1 == CCGOCmode && m2 == CCGCmode))
10565 gcc_unreachable ();
10587 /* These are only compatible with themselves, which we already
10593 /* Split comparison code CODE into comparisons we can do using branch
10594 instructions. BYPASS_CODE is comparison code for branch that will
10595 branch around FIRST_CODE and SECOND_CODE. If some of branches
10596 is not required, set value to UNKNOWN.
10597 We never require more than two branches. */
10600 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10601 enum rtx_code *first_code,
10602 enum rtx_code *second_code)
10604 *first_code = code;
10605 *bypass_code = UNKNOWN;
10606 *second_code = UNKNOWN;
10608 /* The fcomi comparison sets flags as follows:
10618 case GT: /* GTU - CF=0 & ZF=0 */
10619 case GE: /* GEU - CF=0 */
10620 case ORDERED: /* PF=0 */
10621 case UNORDERED: /* PF=1 */
10622 case UNEQ: /* EQ - ZF=1 */
10623 case UNLT: /* LTU - CF=1 */
10624 case UNLE: /* LEU - CF=1 | ZF=1 */
10625 case LTGT: /* EQ - ZF=0 */
10627 case LT: /* LTU - CF=1 - fails on unordered */
10628 *first_code = UNLT;
10629 *bypass_code = UNORDERED;
10631 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10632 *first_code = UNLE;
10633 *bypass_code = UNORDERED;
10635 case EQ: /* EQ - ZF=1 - fails on unordered */
10636 *first_code = UNEQ;
10637 *bypass_code = UNORDERED;
10639 case NE: /* NE - ZF=0 - fails on unordered */
10640 *first_code = LTGT;
10641 *second_code = UNORDERED;
10643 case UNGE: /* GEU - CF=0 - fails on unordered */
10645 *second_code = UNORDERED;
10647 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10649 *second_code = UNORDERED;
10652 gcc_unreachable ();
10654 if (!TARGET_IEEE_FP)
10656 *second_code = UNKNOWN;
10657 *bypass_code = UNKNOWN;
10661 /* Return cost of comparison done fcom + arithmetics operations on AX.
10662 All following functions do use number of instructions as a cost metrics.
10663 In future this should be tweaked to compute bytes for optimize_size and
10664 take into account performance of various instructions on various CPUs. */
10666 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10668 if (!TARGET_IEEE_FP)
10670 /* The cost of code output by ix86_expand_fp_compare. */
10694 gcc_unreachable ();
10698 /* Return cost of comparison done using fcomi operation.
10699 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10701 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10703 enum rtx_code bypass_code, first_code, second_code;
10704 /* Return arbitrarily high cost when instruction is not supported - this
10705 prevents gcc from using it. */
10708 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10709 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10712 /* Return cost of comparison done using sahf operation.
10713 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10715 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10717 enum rtx_code bypass_code, first_code, second_code;
10718 /* Return arbitrarily high cost when instruction is not preferred - this
10719 avoids gcc from using it. */
10720 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10722 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10723 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10726 /* Compute cost of the comparison done using any method.
10727 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10729 ix86_fp_comparison_cost (enum rtx_code code)
10731 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10734 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10735 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10737 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10738 if (min > sahf_cost)
10740 if (min > fcomi_cost)
10745 /* Return true if we should use an FCOMI instruction for this
10749 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10751 enum rtx_code swapped_code = swap_condition (code);
10753 return ((ix86_fp_comparison_cost (code)
10754 == ix86_fp_comparison_fcomi_cost (code))
10755 || (ix86_fp_comparison_cost (swapped_code)
10756 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10759 /* Swap, force into registers, or otherwise massage the two operands
10760 to a fp comparison. The operands are updated in place; the new
10761 comparison code is returned. */
10763 static enum rtx_code
10764 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10766 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10767 rtx op0 = *pop0, op1 = *pop1;
10768 enum machine_mode op_mode = GET_MODE (op0);
10769 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10771 /* All of the unordered compare instructions only work on registers.
10772 The same is true of the fcomi compare instructions. The XFmode
10773 compare instructions require registers except when comparing
10774 against zero or when converting operand 1 from fixed point to
10778 && (fpcmp_mode == CCFPUmode
10779 || (op_mode == XFmode
10780 && ! (standard_80387_constant_p (op0) == 1
10781 || standard_80387_constant_p (op1) == 1)
10782 && GET_CODE (op1) != FLOAT)
10783 || ix86_use_fcomi_compare (code)))
10785 op0 = force_reg (op_mode, op0);
10786 op1 = force_reg (op_mode, op1);
10790 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10791 things around if they appear profitable, otherwise force op0
10792 into a register. */
10794 if (standard_80387_constant_p (op0) == 0
10796 && ! (standard_80387_constant_p (op1) == 0
10800 tmp = op0, op0 = op1, op1 = tmp;
10801 code = swap_condition (code);
10805 op0 = force_reg (op_mode, op0);
10807 if (CONSTANT_P (op1))
10809 int tmp = standard_80387_constant_p (op1);
10811 op1 = validize_mem (force_const_mem (op_mode, op1));
10815 op1 = force_reg (op_mode, op1);
10818 op1 = force_reg (op_mode, op1);
10822 /* Try to rearrange the comparison to make it cheaper. */
10823 if (ix86_fp_comparison_cost (code)
10824 > ix86_fp_comparison_cost (swap_condition (code))
10825 && (REG_P (op1) || !no_new_pseudos))
10828 tmp = op0, op0 = op1, op1 = tmp;
10829 code = swap_condition (code);
10831 op0 = force_reg (op_mode, op0);
10839 /* Convert comparison codes we use to represent FP comparison to integer
10840 code that will result in proper branch. Return UNKNOWN if no such code
10844 ix86_fp_compare_code_to_integer (enum rtx_code code)
10873 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10876 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10877 rtx *second_test, rtx *bypass_test)
10879 enum machine_mode fpcmp_mode, intcmp_mode;
10881 int cost = ix86_fp_comparison_cost (code);
10882 enum rtx_code bypass_code, first_code, second_code;
10884 fpcmp_mode = ix86_fp_compare_mode (code);
10885 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10888 *second_test = NULL_RTX;
10890 *bypass_test = NULL_RTX;
10892 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10894 /* Do fcomi/sahf based test when profitable. */
10895 if ((TARGET_CMOVE || TARGET_SAHF)
10896 && (bypass_code == UNKNOWN || bypass_test)
10897 && (second_code == UNKNOWN || second_test)
10898 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10902 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10903 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10909 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10910 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10912 scratch = gen_reg_rtx (HImode);
10913 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10914 emit_insn (gen_x86_sahf_1 (scratch));
10917 /* The FP codes work out to act like unsigned. */
10918 intcmp_mode = fpcmp_mode;
10920 if (bypass_code != UNKNOWN)
10921 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10922 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10924 if (second_code != UNKNOWN)
10925 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10926 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10931 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10932 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10933 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10935 scratch = gen_reg_rtx (HImode);
10936 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10938 /* In the unordered case, we have to check C2 for NaN's, which
10939 doesn't happen to work out to anything nice combination-wise.
10940 So do some bit twiddling on the value we've got in AH to come
10941 up with an appropriate set of condition codes. */
10943 intcmp_mode = CCNOmode;
10948 if (code == GT || !TARGET_IEEE_FP)
10950 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10955 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10956 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10957 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10958 intcmp_mode = CCmode;
10964 if (code == LT && TARGET_IEEE_FP)
10966 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10967 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10968 intcmp_mode = CCmode;
10973 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10979 if (code == GE || !TARGET_IEEE_FP)
10981 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10986 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10987 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10994 if (code == LE && TARGET_IEEE_FP)
10996 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10997 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10998 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10999 intcmp_mode = CCmode;
11004 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11010 if (code == EQ && TARGET_IEEE_FP)
11012 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11013 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11014 intcmp_mode = CCmode;
11019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11026 if (code == NE && TARGET_IEEE_FP)
11028 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11029 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11035 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11045 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11050 gcc_unreachable ();
11054 /* Return the test that should be put into the flags user, i.e.
11055 the bcc, scc, or cmov instruction. */
11056 return gen_rtx_fmt_ee (code, VOIDmode,
11057 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11062 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11065 op0 = ix86_compare_op0;
11066 op1 = ix86_compare_op1;
11069 *second_test = NULL_RTX;
11071 *bypass_test = NULL_RTX;
11073 if (ix86_compare_emitted)
11075 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11076 ix86_compare_emitted = NULL_RTX;
11078 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11079 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11080 second_test, bypass_test);
11082 ret = ix86_expand_int_compare (code, op0, op1);
11087 /* Return true if the CODE will result in nontrivial jump sequence. */
11089 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11091 enum rtx_code bypass_code, first_code, second_code;
11094 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11095 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11099 ix86_expand_branch (enum rtx_code code, rtx label)
11103 /* If we have emitted a compare insn, go straight to simple.
11104 ix86_expand_compare won't emit anything if ix86_compare_emitted
11106 if (ix86_compare_emitted)
11109 switch (GET_MODE (ix86_compare_op0))
11115 tmp = ix86_expand_compare (code, NULL, NULL);
11116 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11117 gen_rtx_LABEL_REF (VOIDmode, label),
11119 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11128 enum rtx_code bypass_code, first_code, second_code;
11130 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11131 &ix86_compare_op1);
11133 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11135 /* Check whether we will use the natural sequence with one jump. If
11136 so, we can expand jump early. Otherwise delay expansion by
11137 creating compound insn to not confuse optimizers. */
11138 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11141 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11142 gen_rtx_LABEL_REF (VOIDmode, label),
11143 pc_rtx, NULL_RTX, NULL_RTX);
11147 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11148 ix86_compare_op0, ix86_compare_op1);
11149 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11150 gen_rtx_LABEL_REF (VOIDmode, label),
11152 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11154 use_fcomi = ix86_use_fcomi_compare (code);
11155 vec = rtvec_alloc (3 + !use_fcomi);
11156 RTVEC_ELT (vec, 0) = tmp;
11158 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11160 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11163 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11165 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11174 /* Expand DImode branch into multiple compare+branch. */
11176 rtx lo[2], hi[2], label2;
11177 enum rtx_code code1, code2, code3;
11178 enum machine_mode submode;
11180 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11182 tmp = ix86_compare_op0;
11183 ix86_compare_op0 = ix86_compare_op1;
11184 ix86_compare_op1 = tmp;
11185 code = swap_condition (code);
11187 if (GET_MODE (ix86_compare_op0) == DImode)
11189 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11190 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11195 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11196 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11200 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11201 avoid two branches. This costs one extra insn, so disable when
11202 optimizing for size. */
11204 if ((code == EQ || code == NE)
11206 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11211 if (hi[1] != const0_rtx)
11212 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11213 NULL_RTX, 0, OPTAB_WIDEN);
11216 if (lo[1] != const0_rtx)
11217 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11218 NULL_RTX, 0, OPTAB_WIDEN);
11220 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11221 NULL_RTX, 0, OPTAB_WIDEN);
11223 ix86_compare_op0 = tmp;
11224 ix86_compare_op1 = const0_rtx;
11225 ix86_expand_branch (code, label);
11229 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11230 op1 is a constant and the low word is zero, then we can just
11231 examine the high word. */
11233 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11236 case LT: case LTU: case GE: case GEU:
11237 ix86_compare_op0 = hi[0];
11238 ix86_compare_op1 = hi[1];
11239 ix86_expand_branch (code, label);
11245 /* Otherwise, we need two or three jumps. */
11247 label2 = gen_label_rtx ();
11250 code2 = swap_condition (code);
11251 code3 = unsigned_condition (code);
11255 case LT: case GT: case LTU: case GTU:
11258 case LE: code1 = LT; code2 = GT; break;
11259 case GE: code1 = GT; code2 = LT; break;
11260 case LEU: code1 = LTU; code2 = GTU; break;
11261 case GEU: code1 = GTU; code2 = LTU; break;
11263 case EQ: code1 = UNKNOWN; code2 = NE; break;
11264 case NE: code2 = UNKNOWN; break;
11267 gcc_unreachable ();
11272 * if (hi(a) < hi(b)) goto true;
11273 * if (hi(a) > hi(b)) goto false;
11274 * if (lo(a) < lo(b)) goto true;
11278 ix86_compare_op0 = hi[0];
11279 ix86_compare_op1 = hi[1];
11281 if (code1 != UNKNOWN)
11282 ix86_expand_branch (code1, label);
11283 if (code2 != UNKNOWN)
11284 ix86_expand_branch (code2, label2);
11286 ix86_compare_op0 = lo[0];
11287 ix86_compare_op1 = lo[1];
11288 ix86_expand_branch (code3, label);
11290 if (code2 != UNKNOWN)
11291 emit_label (label2);
11296 gcc_unreachable ();
11300 /* Split branch based on floating point condition. */
11302 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11303 rtx target1, rtx target2, rtx tmp, rtx pushed)
11305 rtx second, bypass;
11306 rtx label = NULL_RTX;
11308 int bypass_probability = -1, second_probability = -1, probability = -1;
11311 if (target2 != pc_rtx)
11314 code = reverse_condition_maybe_unordered (code);
11319 condition = ix86_expand_fp_compare (code, op1, op2,
11320 tmp, &second, &bypass);
11322 /* Remove pushed operand from stack. */
11324 ix86_free_from_memory (GET_MODE (pushed));
11326 if (split_branch_probability >= 0)
11328 /* Distribute the probabilities across the jumps.
11329 Assume the BYPASS and SECOND to be always test
11331 probability = split_branch_probability;
11333 /* Value of 1 is low enough to make no need for probability
11334 to be updated. Later we may run some experiments and see
11335 if unordered values are more frequent in practice. */
11337 bypass_probability = 1;
11339 second_probability = 1;
11341 if (bypass != NULL_RTX)
11343 label = gen_label_rtx ();
11344 i = emit_jump_insn (gen_rtx_SET
11346 gen_rtx_IF_THEN_ELSE (VOIDmode,
11348 gen_rtx_LABEL_REF (VOIDmode,
11351 if (bypass_probability >= 0)
11353 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11354 GEN_INT (bypass_probability),
11357 i = emit_jump_insn (gen_rtx_SET
11359 gen_rtx_IF_THEN_ELSE (VOIDmode,
11360 condition, target1, target2)));
11361 if (probability >= 0)
11363 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11364 GEN_INT (probability),
11366 if (second != NULL_RTX)
11368 i = emit_jump_insn (gen_rtx_SET
11370 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11372 if (second_probability >= 0)
11374 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11375 GEN_INT (second_probability),
11378 if (label != NULL_RTX)
11379 emit_label (label);
11383 ix86_expand_setcc (enum rtx_code code, rtx dest)
11385 rtx ret, tmp, tmpreg, equiv;
11386 rtx second_test, bypass_test;
11388 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11389 return 0; /* FAIL */
11391 gcc_assert (GET_MODE (dest) == QImode);
11393 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11394 PUT_MODE (ret, QImode);
11399 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11400 if (bypass_test || second_test)
11402 rtx test = second_test;
11404 rtx tmp2 = gen_reg_rtx (QImode);
11407 gcc_assert (!second_test);
11408 test = bypass_test;
11410 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11412 PUT_MODE (test, QImode);
11413 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11416 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11418 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11421 /* Attach a REG_EQUAL note describing the comparison result. */
11422 if (ix86_compare_op0 && ix86_compare_op1)
11424 equiv = simplify_gen_relational (code, QImode,
11425 GET_MODE (ix86_compare_op0),
11426 ix86_compare_op0, ix86_compare_op1);
11427 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11430 return 1; /* DONE */
11433 /* Expand comparison setting or clearing carry flag. Return true when
11434 successful and set pop for the operation. */
11436 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11438 enum machine_mode mode =
11439 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11441 /* Do not handle DImode compares that go through special path. Also we can't
11442 deal with FP compares yet. This is possible to add. */
11443 if (mode == (TARGET_64BIT ? TImode : DImode))
11445 if (FLOAT_MODE_P (mode))
11447 rtx second_test = NULL, bypass_test = NULL;
11448 rtx compare_op, compare_seq;
11450 /* Shortcut: following common codes never translate into carry flag compares. */
11451 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11452 || code == ORDERED || code == UNORDERED)
11455 /* These comparisons require zero flag; swap operands so they won't. */
11456 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11457 && !TARGET_IEEE_FP)
11462 code = swap_condition (code);
11465 /* Try to expand the comparison and verify that we end up with carry flag
11466 based comparison. This is fails to be true only when we decide to expand
11467 comparison using arithmetic that is not too common scenario. */
11469 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11470 &second_test, &bypass_test);
11471 compare_seq = get_insns ();
11474 if (second_test || bypass_test)
11476 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11477 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11478 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11480 code = GET_CODE (compare_op);
11481 if (code != LTU && code != GEU)
11483 emit_insn (compare_seq);
11487 if (!INTEGRAL_MODE_P (mode))
11495 /* Convert a==0 into (unsigned)a<1. */
11498 if (op1 != const0_rtx)
11501 code = (code == EQ ? LTU : GEU);
11504 /* Convert a>b into b<a or a>=b-1. */
11507 if (CONST_INT_P (op1))
11509 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11510 /* Bail out on overflow. We still can swap operands but that
11511 would force loading of the constant into register. */
11512 if (op1 == const0_rtx
11513 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11515 code = (code == GTU ? GEU : LTU);
11522 code = (code == GTU ? LTU : GEU);
11526 /* Convert a>=0 into (unsigned)a<0x80000000. */
11529 if (mode == DImode || op1 != const0_rtx)
11531 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11532 code = (code == LT ? GEU : LTU);
11536 if (mode == DImode || op1 != constm1_rtx)
11538 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11539 code = (code == LE ? GEU : LTU);
11545 /* Swapping operands may cause constant to appear as first operand. */
11546 if (!nonimmediate_operand (op0, VOIDmode))
11548 if (no_new_pseudos)
11550 op0 = force_reg (mode, op0);
11552 ix86_compare_op0 = op0;
11553 ix86_compare_op1 = op1;
11554 *pop = ix86_expand_compare (code, NULL, NULL);
11555 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11560 ix86_expand_int_movcc (rtx operands[])
11562 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11563 rtx compare_seq, compare_op;
11564 rtx second_test, bypass_test;
11565 enum machine_mode mode = GET_MODE (operands[0]);
11566 bool sign_bit_compare_p = false;;
11569 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11570 compare_seq = get_insns ();
11573 compare_code = GET_CODE (compare_op);
11575 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11576 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11577 sign_bit_compare_p = true;
11579 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11580 HImode insns, we'd be swallowed in word prefix ops. */
11582 if ((mode != HImode || TARGET_FAST_PREFIX)
11583 && (mode != (TARGET_64BIT ? TImode : DImode))
11584 && CONST_INT_P (operands[2])
11585 && CONST_INT_P (operands[3]))
11587 rtx out = operands[0];
11588 HOST_WIDE_INT ct = INTVAL (operands[2]);
11589 HOST_WIDE_INT cf = INTVAL (operands[3]);
11590 HOST_WIDE_INT diff;
11593 /* Sign bit compares are better done using shifts than we do by using
11595 if (sign_bit_compare_p
11596 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11597 ix86_compare_op1, &compare_op))
11599 /* Detect overlap between destination and compare sources. */
11602 if (!sign_bit_compare_p)
11604 bool fpcmp = false;
11606 compare_code = GET_CODE (compare_op);
11608 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11609 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11612 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11615 /* To simplify rest of code, restrict to the GEU case. */
11616 if (compare_code == LTU)
11618 HOST_WIDE_INT tmp = ct;
11621 compare_code = reverse_condition (compare_code);
11622 code = reverse_condition (code);
11627 PUT_CODE (compare_op,
11628 reverse_condition_maybe_unordered
11629 (GET_CODE (compare_op)));
11631 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11635 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11636 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11637 tmp = gen_reg_rtx (mode);
11639 if (mode == DImode)
11640 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11642 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11646 if (code == GT || code == GE)
11647 code = reverse_condition (code);
11650 HOST_WIDE_INT tmp = ct;
11655 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11656 ix86_compare_op1, VOIDmode, 0, -1);
11669 tmp = expand_simple_binop (mode, PLUS,
11671 copy_rtx (tmp), 1, OPTAB_DIRECT);
11682 tmp = expand_simple_binop (mode, IOR,
11684 copy_rtx (tmp), 1, OPTAB_DIRECT);
11686 else if (diff == -1 && ct)
11696 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11698 tmp = expand_simple_binop (mode, PLUS,
11699 copy_rtx (tmp), GEN_INT (cf),
11700 copy_rtx (tmp), 1, OPTAB_DIRECT);
11708 * andl cf - ct, dest
11718 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11721 tmp = expand_simple_binop (mode, AND,
11723 gen_int_mode (cf - ct, mode),
11724 copy_rtx (tmp), 1, OPTAB_DIRECT);
11726 tmp = expand_simple_binop (mode, PLUS,
11727 copy_rtx (tmp), GEN_INT (ct),
11728 copy_rtx (tmp), 1, OPTAB_DIRECT);
11731 if (!rtx_equal_p (tmp, out))
11732 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11734 return 1; /* DONE */
11740 tmp = ct, ct = cf, cf = tmp;
11742 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11744 /* We may be reversing unordered compare to normal compare, that
11745 is not valid in general (we may convert non-trapping condition
11746 to trapping one), however on i386 we currently emit all
11747 comparisons unordered. */
11748 compare_code = reverse_condition_maybe_unordered (compare_code);
11749 code = reverse_condition_maybe_unordered (code);
11753 compare_code = reverse_condition (compare_code);
11754 code = reverse_condition (code);
11758 compare_code = UNKNOWN;
11759 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11760 && CONST_INT_P (ix86_compare_op1))
11762 if (ix86_compare_op1 == const0_rtx
11763 && (code == LT || code == GE))
11764 compare_code = code;
11765 else if (ix86_compare_op1 == constm1_rtx)
11769 else if (code == GT)
11774 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11775 if (compare_code != UNKNOWN
11776 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11777 && (cf == -1 || ct == -1))
11779 /* If lea code below could be used, only optimize
11780 if it results in a 2 insn sequence. */
11782 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11783 || diff == 3 || diff == 5 || diff == 9)
11784 || (compare_code == LT && ct == -1)
11785 || (compare_code == GE && cf == -1))
11788 * notl op1 (if necessary)
11796 code = reverse_condition (code);
11799 out = emit_store_flag (out, code, ix86_compare_op0,
11800 ix86_compare_op1, VOIDmode, 0, -1);
11802 out = expand_simple_binop (mode, IOR,
11804 out, 1, OPTAB_DIRECT);
11805 if (out != operands[0])
11806 emit_move_insn (operands[0], out);
11808 return 1; /* DONE */
11813 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11814 || diff == 3 || diff == 5 || diff == 9)
11815 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11817 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11823 * lea cf(dest*(ct-cf)),dest
11827 * This also catches the degenerate setcc-only case.
11833 out = emit_store_flag (out, code, ix86_compare_op0,
11834 ix86_compare_op1, VOIDmode, 0, 1);
11837 /* On x86_64 the lea instruction operates on Pmode, so we need
11838 to get arithmetics done in proper mode to match. */
11840 tmp = copy_rtx (out);
11844 out1 = copy_rtx (out);
11845 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11849 tmp = gen_rtx_PLUS (mode, tmp, out1);
11855 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11858 if (!rtx_equal_p (tmp, out))
11861 out = force_operand (tmp, copy_rtx (out));
11863 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11865 if (!rtx_equal_p (out, operands[0]))
11866 emit_move_insn (operands[0], copy_rtx (out));
11868 return 1; /* DONE */
11872 * General case: Jumpful:
11873 * xorl dest,dest cmpl op1, op2
11874 * cmpl op1, op2 movl ct, dest
11875 * setcc dest jcc 1f
11876 * decl dest movl cf, dest
11877 * andl (cf-ct),dest 1:
11880 * Size 20. Size 14.
11882 * This is reasonably steep, but branch mispredict costs are
11883 * high on modern cpus, so consider failing only if optimizing
11887 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11888 && BRANCH_COST >= 2)
11894 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11895 /* We may be reversing unordered compare to normal compare,
11896 that is not valid in general (we may convert non-trapping
11897 condition to trapping one), however on i386 we currently
11898 emit all comparisons unordered. */
11899 code = reverse_condition_maybe_unordered (code);
11902 code = reverse_condition (code);
11903 if (compare_code != UNKNOWN)
11904 compare_code = reverse_condition (compare_code);
11908 if (compare_code != UNKNOWN)
11910 /* notl op1 (if needed)
11915 For x < 0 (resp. x <= -1) there will be no notl,
11916 so if possible swap the constants to get rid of the
11918 True/false will be -1/0 while code below (store flag
11919 followed by decrement) is 0/-1, so the constants need
11920 to be exchanged once more. */
11922 if (compare_code == GE || !cf)
11924 code = reverse_condition (code);
11929 HOST_WIDE_INT tmp = cf;
11934 out = emit_store_flag (out, code, ix86_compare_op0,
11935 ix86_compare_op1, VOIDmode, 0, -1);
11939 out = emit_store_flag (out, code, ix86_compare_op0,
11940 ix86_compare_op1, VOIDmode, 0, 1);
11942 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11943 copy_rtx (out), 1, OPTAB_DIRECT);
11946 out = expand_simple_binop (mode, AND, copy_rtx (out),
11947 gen_int_mode (cf - ct, mode),
11948 copy_rtx (out), 1, OPTAB_DIRECT);
11950 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11951 copy_rtx (out), 1, OPTAB_DIRECT);
11952 if (!rtx_equal_p (out, operands[0]))
11953 emit_move_insn (operands[0], copy_rtx (out));
11955 return 1; /* DONE */
11959 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11961 /* Try a few things more with specific constants and a variable. */
11964 rtx var, orig_out, out, tmp;
11966 if (BRANCH_COST <= 2)
11967 return 0; /* FAIL */
11969 /* If one of the two operands is an interesting constant, load a
11970 constant with the above and mask it in with a logical operation. */
11972 if (CONST_INT_P (operands[2]))
11975 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11976 operands[3] = constm1_rtx, op = and_optab;
11977 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11978 operands[3] = const0_rtx, op = ior_optab;
11980 return 0; /* FAIL */
11982 else if (CONST_INT_P (operands[3]))
11985 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11986 operands[2] = constm1_rtx, op = and_optab;
11987 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11988 operands[2] = const0_rtx, op = ior_optab;
11990 return 0; /* FAIL */
11993 return 0; /* FAIL */
11995 orig_out = operands[0];
11996 tmp = gen_reg_rtx (mode);
11999 /* Recurse to get the constant loaded. */
12000 if (ix86_expand_int_movcc (operands) == 0)
12001 return 0; /* FAIL */
12003 /* Mask in the interesting variable. */
12004 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12006 if (!rtx_equal_p (out, orig_out))
12007 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12009 return 1; /* DONE */
12013 * For comparison with above,
12023 if (! nonimmediate_operand (operands[2], mode))
12024 operands[2] = force_reg (mode, operands[2]);
12025 if (! nonimmediate_operand (operands[3], mode))
12026 operands[3] = force_reg (mode, operands[3]);
12028 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12030 rtx tmp = gen_reg_rtx (mode);
12031 emit_move_insn (tmp, operands[3]);
12034 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12036 rtx tmp = gen_reg_rtx (mode);
12037 emit_move_insn (tmp, operands[2]);
12041 if (! register_operand (operands[2], VOIDmode)
12043 || ! register_operand (operands[3], VOIDmode)))
12044 operands[2] = force_reg (mode, operands[2]);
12047 && ! register_operand (operands[3], VOIDmode))
12048 operands[3] = force_reg (mode, operands[3]);
12050 emit_insn (compare_seq);
12051 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12052 gen_rtx_IF_THEN_ELSE (mode,
12053 compare_op, operands[2],
12056 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12057 gen_rtx_IF_THEN_ELSE (mode,
12059 copy_rtx (operands[3]),
12060 copy_rtx (operands[0]))));
12062 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12063 gen_rtx_IF_THEN_ELSE (mode,
12065 copy_rtx (operands[2]),
12066 copy_rtx (operands[0]))));
12068 return 1; /* DONE */
12071 /* Swap, force into registers, or otherwise massage the two operands
12072 to an sse comparison with a mask result. Thus we differ a bit from
12073 ix86_prepare_fp_compare_args which expects to produce a flags result.
12075 The DEST operand exists to help determine whether to commute commutative
12076 operators. The POP0/POP1 operands are updated in place. The new
12077 comparison code is returned, or UNKNOWN if not implementable. */
12079 static enum rtx_code
12080 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12081 rtx *pop0, rtx *pop1)
12089 /* We have no LTGT as an operator. We could implement it with
12090 NE & ORDERED, but this requires an extra temporary. It's
12091 not clear that it's worth it. */
12098 /* These are supported directly. */
12105 /* For commutative operators, try to canonicalize the destination
12106 operand to be first in the comparison - this helps reload to
12107 avoid extra moves. */
12108 if (!dest || !rtx_equal_p (dest, *pop1))
12116 /* These are not supported directly. Swap the comparison operands
12117 to transform into something that is supported. */
12121 code = swap_condition (code);
12125 gcc_unreachable ();
12131 /* Detect conditional moves that exactly match min/max operational
12132 semantics. Note that this is IEEE safe, as long as we don't
12133 interchange the operands.
12135 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12136 and TRUE if the operation is successful and instructions are emitted. */
12139 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12140 rtx cmp_op1, rtx if_true, rtx if_false)
12142 enum machine_mode mode;
12148 else if (code == UNGE)
12151 if_true = if_false;
12157 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12159 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12164 mode = GET_MODE (dest);
12166 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12167 but MODE may be a vector mode and thus not appropriate. */
12168 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12170 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12173 if_true = force_reg (mode, if_true);
12174 v = gen_rtvec (2, if_true, if_false);
12175 tmp = gen_rtx_UNSPEC (mode, v, u);
12179 code = is_min ? SMIN : SMAX;
12180 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12183 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12187 /* Expand an sse vector comparison. Return the register with the result. */
12190 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12191 rtx op_true, rtx op_false)
12193 enum machine_mode mode = GET_MODE (dest);
12196 cmp_op0 = force_reg (mode, cmp_op0);
12197 if (!nonimmediate_operand (cmp_op1, mode))
12198 cmp_op1 = force_reg (mode, cmp_op1);
12201 || reg_overlap_mentioned_p (dest, op_true)
12202 || reg_overlap_mentioned_p (dest, op_false))
12203 dest = gen_reg_rtx (mode);
12205 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12206 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12211 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12212 operations. This is used for both scalar and vector conditional moves. */
12215 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12217 enum machine_mode mode = GET_MODE (dest);
12220 if (op_false == CONST0_RTX (mode))
12222 op_true = force_reg (mode, op_true);
12223 x = gen_rtx_AND (mode, cmp, op_true);
12224 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12226 else if (op_true == CONST0_RTX (mode))
12228 op_false = force_reg (mode, op_false);
12229 x = gen_rtx_NOT (mode, cmp);
12230 x = gen_rtx_AND (mode, x, op_false);
12231 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12235 op_true = force_reg (mode, op_true);
12236 op_false = force_reg (mode, op_false);
12238 t2 = gen_reg_rtx (mode);
12240 t3 = gen_reg_rtx (mode);
12244 x = gen_rtx_AND (mode, op_true, cmp);
12245 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12247 x = gen_rtx_NOT (mode, cmp);
12248 x = gen_rtx_AND (mode, x, op_false);
12249 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12251 x = gen_rtx_IOR (mode, t3, t2);
12252 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12256 /* Expand a floating-point conditional move. Return true if successful. */
12259 ix86_expand_fp_movcc (rtx operands[])
12261 enum machine_mode mode = GET_MODE (operands[0]);
12262 enum rtx_code code = GET_CODE (operands[1]);
12263 rtx tmp, compare_op, second_test, bypass_test;
12265 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12267 enum machine_mode cmode;
12269 /* Since we've no cmove for sse registers, don't force bad register
12270 allocation just to gain access to it. Deny movcc when the
12271 comparison mode doesn't match the move mode. */
12272 cmode = GET_MODE (ix86_compare_op0);
12273 if (cmode == VOIDmode)
12274 cmode = GET_MODE (ix86_compare_op1);
12278 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12280 &ix86_compare_op1);
12281 if (code == UNKNOWN)
12284 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12285 ix86_compare_op1, operands[2],
12289 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12290 ix86_compare_op1, operands[2], operands[3]);
12291 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12295 /* The floating point conditional move instructions don't directly
12296 support conditions resulting from a signed integer comparison. */
12298 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12300 /* The floating point conditional move instructions don't directly
12301 support signed integer comparisons. */
12303 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12305 gcc_assert (!second_test && !bypass_test);
12306 tmp = gen_reg_rtx (QImode);
12307 ix86_expand_setcc (code, tmp);
12309 ix86_compare_op0 = tmp;
12310 ix86_compare_op1 = const0_rtx;
12311 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12313 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12315 tmp = gen_reg_rtx (mode);
12316 emit_move_insn (tmp, operands[3]);
12319 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12321 tmp = gen_reg_rtx (mode);
12322 emit_move_insn (tmp, operands[2]);
12326 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12327 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12328 operands[2], operands[3])));
12330 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12331 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12332 operands[3], operands[0])));
12334 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12335 gen_rtx_IF_THEN_ELSE (mode, second_test,
12336 operands[2], operands[0])));
12341 /* Expand a floating-point vector conditional move; a vcond operation
12342 rather than a movcc operation. */
12345 ix86_expand_fp_vcond (rtx operands[])
12347 enum rtx_code code = GET_CODE (operands[3]);
12350 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12351 &operands[4], &operands[5]);
12352 if (code == UNKNOWN)
12355 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12356 operands[5], operands[1], operands[2]))
12359 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12360 operands[1], operands[2]);
12361 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12365 /* Expand a signed integral vector conditional move. */
12368 ix86_expand_int_vcond (rtx operands[])
12370 enum machine_mode mode = GET_MODE (operands[0]);
12371 enum rtx_code code = GET_CODE (operands[3]);
12372 bool negate = false;
12375 cop0 = operands[4];
12376 cop1 = operands[5];
12378 /* Canonicalize the comparison to EQ, GT, GTU. */
12389 code = reverse_condition (code);
12395 code = reverse_condition (code);
12401 code = swap_condition (code);
12402 x = cop0, cop0 = cop1, cop1 = x;
12406 gcc_unreachable ();
12409 /* Unsigned parallel compare is not supported by the hardware. Play some
12410 tricks to turn this into a signed comparison against 0. */
12413 cop0 = force_reg (mode, cop0);
12421 /* Perform a parallel modulo subtraction. */
12422 t1 = gen_reg_rtx (mode);
12423 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12425 /* Extract the original sign bit of op0. */
12426 mask = GEN_INT (-0x80000000);
12427 mask = gen_rtx_CONST_VECTOR (mode,
12428 gen_rtvec (4, mask, mask, mask, mask));
12429 mask = force_reg (mode, mask);
12430 t2 = gen_reg_rtx (mode);
12431 emit_insn (gen_andv4si3 (t2, cop0, mask));
12433 /* XOR it back into the result of the subtraction. This results
12434 in the sign bit set iff we saw unsigned underflow. */
12435 x = gen_reg_rtx (mode);
12436 emit_insn (gen_xorv4si3 (x, t1, t2));
12444 /* Perform a parallel unsigned saturating subtraction. */
12445 x = gen_reg_rtx (mode);
12446 emit_insn (gen_rtx_SET (VOIDmode, x,
12447 gen_rtx_US_MINUS (mode, cop0, cop1)));
12454 gcc_unreachable ();
12458 cop1 = CONST0_RTX (mode);
12461 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12462 operands[1+negate], operands[2-negate]);
12464 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12465 operands[2-negate]);
12469 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12470 true if we should do zero extension, else sign extension. HIGH_P is
12471 true if we want the N/2 high elements, else the low elements. */
12474 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12476 enum machine_mode imode = GET_MODE (operands[1]);
12477 rtx (*unpack)(rtx, rtx, rtx);
12484 unpack = gen_vec_interleave_highv16qi;
12486 unpack = gen_vec_interleave_lowv16qi;
12490 unpack = gen_vec_interleave_highv8hi;
12492 unpack = gen_vec_interleave_lowv8hi;
12496 unpack = gen_vec_interleave_highv4si;
12498 unpack = gen_vec_interleave_lowv4si;
12501 gcc_unreachable ();
12504 dest = gen_lowpart (imode, operands[0]);
12507 se = force_reg (imode, CONST0_RTX (imode));
12509 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12510 operands[1], pc_rtx, pc_rtx);
12512 emit_insn (unpack (dest, operands[1], se));
12515 /* Expand conditional increment or decrement using adb/sbb instructions.
12516 The default case using setcc followed by the conditional move can be
12517 done by generic code. */
12519 ix86_expand_int_addcc (rtx operands[])
12521 enum rtx_code code = GET_CODE (operands[1]);
12523 rtx val = const0_rtx;
12524 bool fpcmp = false;
12525 enum machine_mode mode = GET_MODE (operands[0]);
12527 if (operands[3] != const1_rtx
12528 && operands[3] != constm1_rtx)
12530 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12531 ix86_compare_op1, &compare_op))
12533 code = GET_CODE (compare_op);
12535 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12536 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12539 code = ix86_fp_compare_code_to_integer (code);
12546 PUT_CODE (compare_op,
12547 reverse_condition_maybe_unordered
12548 (GET_CODE (compare_op)));
12550 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12552 PUT_MODE (compare_op, mode);
12554 /* Construct either adc or sbb insn. */
12555 if ((code == LTU) == (operands[3] == constm1_rtx))
12557 switch (GET_MODE (operands[0]))
12560 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12563 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12566 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12569 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12572 gcc_unreachable ();
12577 switch (GET_MODE (operands[0]))
12580 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12583 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12586 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12589 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12592 gcc_unreachable ();
12595 return 1; /* DONE */
12599 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12600 works for floating pointer parameters and nonoffsetable memories.
12601 For pushes, it returns just stack offsets; the values will be saved
12602 in the right order. Maximally three parts are generated. */
12605 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12610 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12612 size = (GET_MODE_SIZE (mode) + 4) / 8;
12614 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12615 gcc_assert (size >= 2 && size <= 3);
12617 /* Optimize constant pool reference to immediates. This is used by fp
12618 moves, that force all constants to memory to allow combining. */
12619 if (MEM_P (operand) && MEM_READONLY_P (operand))
12621 rtx tmp = maybe_get_pool_constant (operand);
12626 if (MEM_P (operand) && !offsettable_memref_p (operand))
12628 /* The only non-offsetable memories we handle are pushes. */
12629 int ok = push_operand (operand, VOIDmode);
12633 operand = copy_rtx (operand);
12634 PUT_MODE (operand, Pmode);
12635 parts[0] = parts[1] = parts[2] = operand;
12639 if (GET_CODE (operand) == CONST_VECTOR)
12641 enum machine_mode imode = int_mode_for_mode (mode);
12642 /* Caution: if we looked through a constant pool memory above,
12643 the operand may actually have a different mode now. That's
12644 ok, since we want to pun this all the way back to an integer. */
12645 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12646 gcc_assert (operand != NULL);
12652 if (mode == DImode)
12653 split_di (&operand, 1, &parts[0], &parts[1]);
12656 if (REG_P (operand))
12658 gcc_assert (reload_completed);
12659 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12660 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12662 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12664 else if (offsettable_memref_p (operand))
12666 operand = adjust_address (operand, SImode, 0);
12667 parts[0] = operand;
12668 parts[1] = adjust_address (operand, SImode, 4);
12670 parts[2] = adjust_address (operand, SImode, 8);
12672 else if (GET_CODE (operand) == CONST_DOUBLE)
12677 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12681 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12682 parts[2] = gen_int_mode (l[2], SImode);
12685 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12688 gcc_unreachable ();
12690 parts[1] = gen_int_mode (l[1], SImode);
12691 parts[0] = gen_int_mode (l[0], SImode);
12694 gcc_unreachable ();
12699 if (mode == TImode)
12700 split_ti (&operand, 1, &parts[0], &parts[1]);
12701 if (mode == XFmode || mode == TFmode)
12703 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12704 if (REG_P (operand))
12706 gcc_assert (reload_completed);
12707 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12708 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12710 else if (offsettable_memref_p (operand))
12712 operand = adjust_address (operand, DImode, 0);
12713 parts[0] = operand;
12714 parts[1] = adjust_address (operand, upper_mode, 8);
12716 else if (GET_CODE (operand) == CONST_DOUBLE)
12721 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12722 real_to_target (l, &r, mode);
12724 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12725 if (HOST_BITS_PER_WIDE_INT >= 64)
12728 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12729 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12732 parts[0] = immed_double_const (l[0], l[1], DImode);
12734 if (upper_mode == SImode)
12735 parts[1] = gen_int_mode (l[2], SImode);
12736 else if (HOST_BITS_PER_WIDE_INT >= 64)
12739 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12740 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12743 parts[1] = immed_double_const (l[2], l[3], DImode);
12746 gcc_unreachable ();
12753 /* Emit insns to perform a move or push of DI, DF, and XF values.
12754 Return false when normal moves are needed; true when all required
12755 insns have been emitted. Operands 2-4 contain the input values
12756 int the correct order; operands 5-7 contain the output values. */
12759 ix86_split_long_move (rtx operands[])
12764 int collisions = 0;
12765 enum machine_mode mode = GET_MODE (operands[0]);
12767 /* The DFmode expanders may ask us to move double.
12768 For 64bit target this is single move. By hiding the fact
12769 here we simplify i386.md splitters. */
12770 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12772 /* Optimize constant pool reference to immediates. This is used by
12773 fp moves, that force all constants to memory to allow combining. */
12775 if (MEM_P (operands[1])
12776 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12777 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12778 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12779 if (push_operand (operands[0], VOIDmode))
12781 operands[0] = copy_rtx (operands[0]);
12782 PUT_MODE (operands[0], Pmode);
12785 operands[0] = gen_lowpart (DImode, operands[0]);
12786 operands[1] = gen_lowpart (DImode, operands[1]);
12787 emit_move_insn (operands[0], operands[1]);
12791 /* The only non-offsettable memory we handle is push. */
12792 if (push_operand (operands[0], VOIDmode))
12795 gcc_assert (!MEM_P (operands[0])
12796 || offsettable_memref_p (operands[0]));
12798 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12799 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12801 /* When emitting push, take care for source operands on the stack. */
12802 if (push && MEM_P (operands[1])
12803 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12806 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12807 XEXP (part[1][2], 0));
12808 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12809 XEXP (part[1][1], 0));
12812 /* We need to do copy in the right order in case an address register
12813 of the source overlaps the destination. */
12814 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12816 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12818 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12821 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12824 /* Collision in the middle part can be handled by reordering. */
12825 if (collisions == 1 && nparts == 3
12826 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12829 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12830 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12833 /* If there are more collisions, we can't handle it by reordering.
12834 Do an lea to the last part and use only one colliding move. */
12835 else if (collisions > 1)
12841 base = part[0][nparts - 1];
12843 /* Handle the case when the last part isn't valid for lea.
12844 Happens in 64-bit mode storing the 12-byte XFmode. */
12845 if (GET_MODE (base) != Pmode)
12846 base = gen_rtx_REG (Pmode, REGNO (base));
12848 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12849 part[1][0] = replace_equiv_address (part[1][0], base);
12850 part[1][1] = replace_equiv_address (part[1][1],
12851 plus_constant (base, UNITS_PER_WORD));
12853 part[1][2] = replace_equiv_address (part[1][2],
12854 plus_constant (base, 8));
12864 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12865 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12866 emit_move_insn (part[0][2], part[1][2]);
12871 /* In 64bit mode we don't have 32bit push available. In case this is
12872 register, it is OK - we will just use larger counterpart. We also
12873 retype memory - these comes from attempt to avoid REX prefix on
12874 moving of second half of TFmode value. */
12875 if (GET_MODE (part[1][1]) == SImode)
12877 switch (GET_CODE (part[1][1]))
12880 part[1][1] = adjust_address (part[1][1], DImode, 0);
12884 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12888 gcc_unreachable ();
12891 if (GET_MODE (part[1][0]) == SImode)
12892 part[1][0] = part[1][1];
12895 emit_move_insn (part[0][1], part[1][1]);
12896 emit_move_insn (part[0][0], part[1][0]);
12900 /* Choose correct order to not overwrite the source before it is copied. */
12901 if ((REG_P (part[0][0])
12902 && REG_P (part[1][1])
12903 && (REGNO (part[0][0]) == REGNO (part[1][1])
12905 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12907 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12911 operands[2] = part[0][2];
12912 operands[3] = part[0][1];
12913 operands[4] = part[0][0];
12914 operands[5] = part[1][2];
12915 operands[6] = part[1][1];
12916 operands[7] = part[1][0];
12920 operands[2] = part[0][1];
12921 operands[3] = part[0][0];
12922 operands[5] = part[1][1];
12923 operands[6] = part[1][0];
12930 operands[2] = part[0][0];
12931 operands[3] = part[0][1];
12932 operands[4] = part[0][2];
12933 operands[5] = part[1][0];
12934 operands[6] = part[1][1];
12935 operands[7] = part[1][2];
12939 operands[2] = part[0][0];
12940 operands[3] = part[0][1];
12941 operands[5] = part[1][0];
12942 operands[6] = part[1][1];
12946 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12949 if (CONST_INT_P (operands[5])
12950 && operands[5] != const0_rtx
12951 && REG_P (operands[2]))
12953 if (CONST_INT_P (operands[6])
12954 && INTVAL (operands[6]) == INTVAL (operands[5]))
12955 operands[6] = operands[2];
12958 && CONST_INT_P (operands[7])
12959 && INTVAL (operands[7]) == INTVAL (operands[5]))
12960 operands[7] = operands[2];
12964 && CONST_INT_P (operands[6])
12965 && operands[6] != const0_rtx
12966 && REG_P (operands[3])
12967 && CONST_INT_P (operands[7])
12968 && INTVAL (operands[7]) == INTVAL (operands[6]))
12969 operands[7] = operands[3];
12972 emit_move_insn (operands[2], operands[5]);
12973 emit_move_insn (operands[3], operands[6]);
12975 emit_move_insn (operands[4], operands[7]);
12980 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12981 left shift by a constant, either using a single shift or
12982 a sequence of add instructions. */
12985 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12989 emit_insn ((mode == DImode
12991 : gen_adddi3) (operand, operand, operand));
12993 else if (!optimize_size
12994 && count * ix86_cost->add <= ix86_cost->shift_const)
12997 for (i=0; i<count; i++)
12999 emit_insn ((mode == DImode
13001 : gen_adddi3) (operand, operand, operand));
13005 emit_insn ((mode == DImode
13007 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13011 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13013 rtx low[2], high[2];
13015 const int single_width = mode == DImode ? 32 : 64;
13017 if (CONST_INT_P (operands[2]))
13019 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13020 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13022 if (count >= single_width)
13024 emit_move_insn (high[0], low[1]);
13025 emit_move_insn (low[0], const0_rtx);
13027 if (count > single_width)
13028 ix86_expand_ashl_const (high[0], count - single_width, mode);
13032 if (!rtx_equal_p (operands[0], operands[1]))
13033 emit_move_insn (operands[0], operands[1]);
13034 emit_insn ((mode == DImode
13036 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13037 ix86_expand_ashl_const (low[0], count, mode);
13042 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13044 if (operands[1] == const1_rtx)
13046 /* Assuming we've chosen a QImode capable registers, then 1 << N
13047 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13048 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13050 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13052 ix86_expand_clear (low[0]);
13053 ix86_expand_clear (high[0]);
13054 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13056 d = gen_lowpart (QImode, low[0]);
13057 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13058 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13059 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13061 d = gen_lowpart (QImode, high[0]);
13062 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13063 s = gen_rtx_NE (QImode, flags, const0_rtx);
13064 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13067 /* Otherwise, we can get the same results by manually performing
13068 a bit extract operation on bit 5/6, and then performing the two
13069 shifts. The two methods of getting 0/1 into low/high are exactly
13070 the same size. Avoiding the shift in the bit extract case helps
13071 pentium4 a bit; no one else seems to care much either way. */
13076 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13077 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13079 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13080 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13082 emit_insn ((mode == DImode
13084 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13085 emit_insn ((mode == DImode
13087 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13088 emit_move_insn (low[0], high[0]);
13089 emit_insn ((mode == DImode
13091 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13094 emit_insn ((mode == DImode
13096 : gen_ashldi3) (low[0], low[0], operands[2]));
13097 emit_insn ((mode == DImode
13099 : gen_ashldi3) (high[0], high[0], operands[2]));
13103 if (operands[1] == constm1_rtx)
13105 /* For -1 << N, we can avoid the shld instruction, because we
13106 know that we're shifting 0...31/63 ones into a -1. */
13107 emit_move_insn (low[0], constm1_rtx);
13109 emit_move_insn (high[0], low[0]);
13111 emit_move_insn (high[0], constm1_rtx);
13115 if (!rtx_equal_p (operands[0], operands[1]))
13116 emit_move_insn (operands[0], operands[1]);
13118 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13119 emit_insn ((mode == DImode
13121 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13124 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13126 if (TARGET_CMOVE && scratch)
13128 ix86_expand_clear (scratch);
13129 emit_insn ((mode == DImode
13130 ? gen_x86_shift_adj_1
13131 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13134 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13138 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13140 rtx low[2], high[2];
13142 const int single_width = mode == DImode ? 32 : 64;
13144 if (CONST_INT_P (operands[2]))
13146 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13147 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13149 if (count == single_width * 2 - 1)
13151 emit_move_insn (high[0], high[1]);
13152 emit_insn ((mode == DImode
13154 : gen_ashrdi3) (high[0], high[0],
13155 GEN_INT (single_width - 1)));
13156 emit_move_insn (low[0], high[0]);
13159 else if (count >= single_width)
13161 emit_move_insn (low[0], high[1]);
13162 emit_move_insn (high[0], low[0]);
13163 emit_insn ((mode == DImode
13165 : gen_ashrdi3) (high[0], high[0],
13166 GEN_INT (single_width - 1)));
13167 if (count > single_width)
13168 emit_insn ((mode == DImode
13170 : gen_ashrdi3) (low[0], low[0],
13171 GEN_INT (count - single_width)));
13175 if (!rtx_equal_p (operands[0], operands[1]))
13176 emit_move_insn (operands[0], operands[1]);
13177 emit_insn ((mode == DImode
13179 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13180 emit_insn ((mode == DImode
13182 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13187 if (!rtx_equal_p (operands[0], operands[1]))
13188 emit_move_insn (operands[0], operands[1]);
13190 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13192 emit_insn ((mode == DImode
13194 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13195 emit_insn ((mode == DImode
13197 : gen_ashrdi3) (high[0], high[0], operands[2]));
13199 if (TARGET_CMOVE && scratch)
13201 emit_move_insn (scratch, high[0]);
13202 emit_insn ((mode == DImode
13204 : gen_ashrdi3) (scratch, scratch,
13205 GEN_INT (single_width - 1)));
13206 emit_insn ((mode == DImode
13207 ? gen_x86_shift_adj_1
13208 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13212 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13217 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13219 rtx low[2], high[2];
13221 const int single_width = mode == DImode ? 32 : 64;
13223 if (CONST_INT_P (operands[2]))
13225 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13226 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13228 if (count >= single_width)
13230 emit_move_insn (low[0], high[1]);
13231 ix86_expand_clear (high[0]);
13233 if (count > single_width)
13234 emit_insn ((mode == DImode
13236 : gen_lshrdi3) (low[0], low[0],
13237 GEN_INT (count - single_width)));
13241 if (!rtx_equal_p (operands[0], operands[1]))
13242 emit_move_insn (operands[0], operands[1]);
13243 emit_insn ((mode == DImode
13245 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13246 emit_insn ((mode == DImode
13248 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13253 if (!rtx_equal_p (operands[0], operands[1]))
13254 emit_move_insn (operands[0], operands[1]);
13256 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13258 emit_insn ((mode == DImode
13260 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13261 emit_insn ((mode == DImode
13263 : gen_lshrdi3) (high[0], high[0], operands[2]));
13265 /* Heh. By reversing the arguments, we can reuse this pattern. */
13266 if (TARGET_CMOVE && scratch)
13268 ix86_expand_clear (scratch);
13269 emit_insn ((mode == DImode
13270 ? gen_x86_shift_adj_1
13271 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13275 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13279 /* Predict just emitted jump instruction to be taken with probability PROB. */
13281 predict_jump (int prob)
13283 rtx insn = get_last_insn ();
13284 gcc_assert (JUMP_P (insn));
13286 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13291 /* Helper function for the string operations below. Dest VARIABLE whether
13292 it is aligned to VALUE bytes. If true, jump to the label. */
13294 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13296 rtx label = gen_label_rtx ();
13297 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13298 if (GET_MODE (variable) == DImode)
13299 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13301 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13302 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13305 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13307 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13311 /* Adjust COUNTER by the VALUE. */
13313 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13315 if (GET_MODE (countreg) == DImode)
13316 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13318 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13321 /* Zero extend possibly SImode EXP to Pmode register. */
13323 ix86_zero_extend_to_Pmode (rtx exp)
13326 if (GET_MODE (exp) == VOIDmode)
13327 return force_reg (Pmode, exp);
13328 if (GET_MODE (exp) == Pmode)
13329 return copy_to_mode_reg (Pmode, exp);
13330 r = gen_reg_rtx (Pmode);
13331 emit_insn (gen_zero_extendsidi2 (r, exp));
13335 /* Divide COUNTREG by SCALE. */
13337 scale_counter (rtx countreg, int scale)
13340 rtx piece_size_mask;
13344 if (CONST_INT_P (countreg))
13345 return GEN_INT (INTVAL (countreg) / scale);
13346 gcc_assert (REG_P (countreg));
13348 piece_size_mask = GEN_INT (scale - 1);
13349 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13350 GEN_INT (exact_log2 (scale)),
13351 NULL, 1, OPTAB_DIRECT);
13355 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13356 for constant loop counts. */
13358 static enum machine_mode
13359 counter_mode (rtx count_exp)
13361 if (GET_MODE (count_exp) != VOIDmode)
13362 return GET_MODE (count_exp);
13363 if (GET_CODE (count_exp) != CONST_INT)
13365 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13370 /* When SRCPTR is non-NULL, output simple loop to move memory
13371 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13372 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13373 equivalent loop to set memory by VALUE (supposed to be in MODE).
13375 The size is rounded down to whole number of chunk size moved at once.
13376 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13380 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13381 rtx destptr, rtx srcptr, rtx value,
13382 rtx count, enum machine_mode mode, int unroll,
13385 rtx out_label, top_label, iter, tmp;
13386 enum machine_mode iter_mode = counter_mode (count);
13387 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13388 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13394 top_label = gen_label_rtx ();
13395 out_label = gen_label_rtx ();
13396 iter = gen_reg_rtx (iter_mode);
13398 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13399 NULL, 1, OPTAB_DIRECT);
13400 /* Those two should combine. */
13401 if (piece_size == const1_rtx)
13403 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13405 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13407 emit_move_insn (iter, const0_rtx);
13409 emit_label (top_label);
13411 tmp = convert_modes (Pmode, iter_mode, iter, true);
13412 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13413 destmem = change_address (destmem, mode, x_addr);
13417 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13418 srcmem = change_address (srcmem, mode, y_addr);
13420 /* When unrolling for chips that reorder memory reads and writes,
13421 we can save registers by using single temporary.
13422 Also using 4 temporaries is overkill in 32bit mode. */
13423 if (!TARGET_64BIT && 0)
13425 for (i = 0; i < unroll; i++)
13430 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13432 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13434 emit_move_insn (destmem, srcmem);
13440 gcc_assert (unroll <= 4);
13441 for (i = 0; i < unroll; i++)
13443 tmpreg[i] = gen_reg_rtx (mode);
13447 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13449 emit_move_insn (tmpreg[i], srcmem);
13451 for (i = 0; i < unroll; i++)
13456 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13458 emit_move_insn (destmem, tmpreg[i]);
13463 for (i = 0; i < unroll; i++)
13467 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13468 emit_move_insn (destmem, value);
13471 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13472 true, OPTAB_LIB_WIDEN);
13474 emit_move_insn (iter, tmp);
13476 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13478 if (expected_size != -1)
13480 expected_size /= GET_MODE_SIZE (mode) * unroll;
13481 if (expected_size == 0)
13483 else if (expected_size > REG_BR_PROB_BASE)
13484 predict_jump (REG_BR_PROB_BASE - 1);
13486 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13489 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13490 iter = ix86_zero_extend_to_Pmode (iter);
13491 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13492 true, OPTAB_LIB_WIDEN);
13493 if (tmp != destptr)
13494 emit_move_insn (destptr, tmp);
13497 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13498 true, OPTAB_LIB_WIDEN);
13500 emit_move_insn (srcptr, tmp);
13502 emit_label (out_label);
13505 /* Output "rep; mov" instruction.
13506 Arguments have same meaning as for previous function */
13508 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13509 rtx destptr, rtx srcptr,
13511 enum machine_mode mode)
13517 /* If the size is known, it is shorter to use rep movs. */
13518 if (mode == QImode && CONST_INT_P (count)
13519 && !(INTVAL (count) & 3))
13522 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13523 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13524 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13525 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13526 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13527 if (mode != QImode)
13529 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13530 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13531 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13532 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13533 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13534 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13538 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13539 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13541 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13545 /* Output "rep; stos" instruction.
13546 Arguments have same meaning as for previous function */
13548 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13550 enum machine_mode mode)
13555 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13556 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13557 value = force_reg (mode, gen_lowpart (mode, value));
13558 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13559 if (mode != QImode)
13561 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13562 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13563 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13566 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13567 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13571 emit_strmov (rtx destmem, rtx srcmem,
13572 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13574 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13575 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13576 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13579 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13581 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13582 rtx destptr, rtx srcptr, rtx count, int max_size)
13585 if (CONST_INT_P (count))
13587 HOST_WIDE_INT countval = INTVAL (count);
13590 if ((countval & 0x10) && max_size > 16)
13594 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13595 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13598 gcc_unreachable ();
13601 if ((countval & 0x08) && max_size > 8)
13604 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13607 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13608 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13612 if ((countval & 0x04) && max_size > 4)
13614 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13617 if ((countval & 0x02) && max_size > 2)
13619 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13622 if ((countval & 0x01) && max_size > 1)
13624 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13631 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13632 count, 1, OPTAB_DIRECT);
13633 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13634 count, QImode, 1, 4);
13638 /* When there are stringops, we can cheaply increase dest and src pointers.
13639 Otherwise we save code size by maintaining offset (zero is readily
13640 available from preceding rep operation) and using x86 addressing modes.
13642 if (TARGET_SINGLE_STRINGOP)
13646 rtx label = ix86_expand_aligntest (count, 4, true);
13647 src = change_address (srcmem, SImode, srcptr);
13648 dest = change_address (destmem, SImode, destptr);
13649 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13650 emit_label (label);
13651 LABEL_NUSES (label) = 1;
13655 rtx label = ix86_expand_aligntest (count, 2, true);
13656 src = change_address (srcmem, HImode, srcptr);
13657 dest = change_address (destmem, HImode, destptr);
13658 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13659 emit_label (label);
13660 LABEL_NUSES (label) = 1;
13664 rtx label = ix86_expand_aligntest (count, 1, true);
13665 src = change_address (srcmem, QImode, srcptr);
13666 dest = change_address (destmem, QImode, destptr);
13667 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13668 emit_label (label);
13669 LABEL_NUSES (label) = 1;
13674 rtx offset = force_reg (Pmode, const0_rtx);
13679 rtx label = ix86_expand_aligntest (count, 4, true);
13680 src = change_address (srcmem, SImode, srcptr);
13681 dest = change_address (destmem, SImode, destptr);
13682 emit_move_insn (dest, src);
13683 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13684 true, OPTAB_LIB_WIDEN);
13686 emit_move_insn (offset, tmp);
13687 emit_label (label);
13688 LABEL_NUSES (label) = 1;
13692 rtx label = ix86_expand_aligntest (count, 2, true);
13693 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13694 src = change_address (srcmem, HImode, tmp);
13695 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13696 dest = change_address (destmem, HImode, tmp);
13697 emit_move_insn (dest, src);
13698 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13699 true, OPTAB_LIB_WIDEN);
13701 emit_move_insn (offset, tmp);
13702 emit_label (label);
13703 LABEL_NUSES (label) = 1;
13707 rtx label = ix86_expand_aligntest (count, 1, true);
13708 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13709 src = change_address (srcmem, QImode, tmp);
13710 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13711 dest = change_address (destmem, QImode, tmp);
13712 emit_move_insn (dest, src);
13713 emit_label (label);
13714 LABEL_NUSES (label) = 1;
13719 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13721 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13722 rtx count, int max_size)
13725 expand_simple_binop (counter_mode (count), AND, count,
13726 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13727 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13728 gen_lowpart (QImode, value), count, QImode,
13732 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13734 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13738 if (CONST_INT_P (count))
13740 HOST_WIDE_INT countval = INTVAL (count);
13743 if ((countval & 0x10) && max_size > 16)
13747 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13748 emit_insn (gen_strset (destptr, dest, value));
13749 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13750 emit_insn (gen_strset (destptr, dest, value));
13753 gcc_unreachable ();
13756 if ((countval & 0x08) && max_size > 8)
13760 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13761 emit_insn (gen_strset (destptr, dest, value));
13765 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13766 emit_insn (gen_strset (destptr, dest, value));
13767 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13768 emit_insn (gen_strset (destptr, dest, value));
13772 if ((countval & 0x04) && max_size > 4)
13774 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13775 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13778 if ((countval & 0x02) && max_size > 2)
13780 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13781 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13784 if ((countval & 0x01) && max_size > 1)
13786 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13787 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13794 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13799 rtx label = ix86_expand_aligntest (count, 16, true);
13802 dest = change_address (destmem, DImode, destptr);
13803 emit_insn (gen_strset (destptr, dest, value));
13804 emit_insn (gen_strset (destptr, dest, value));
13808 dest = change_address (destmem, SImode, destptr);
13809 emit_insn (gen_strset (destptr, dest, value));
13810 emit_insn (gen_strset (destptr, dest, value));
13811 emit_insn (gen_strset (destptr, dest, value));
13812 emit_insn (gen_strset (destptr, dest, value));
13814 emit_label (label);
13815 LABEL_NUSES (label) = 1;
13819 rtx label = ix86_expand_aligntest (count, 8, true);
13822 dest = change_address (destmem, DImode, destptr);
13823 emit_insn (gen_strset (destptr, dest, value));
13827 dest = change_address (destmem, SImode, destptr);
13828 emit_insn (gen_strset (destptr, dest, value));
13829 emit_insn (gen_strset (destptr, dest, value));
13831 emit_label (label);
13832 LABEL_NUSES (label) = 1;
13836 rtx label = ix86_expand_aligntest (count, 4, true);
13837 dest = change_address (destmem, SImode, destptr);
13838 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13839 emit_label (label);
13840 LABEL_NUSES (label) = 1;
13844 rtx label = ix86_expand_aligntest (count, 2, true);
13845 dest = change_address (destmem, HImode, destptr);
13846 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13847 emit_label (label);
13848 LABEL_NUSES (label) = 1;
13852 rtx label = ix86_expand_aligntest (count, 1, true);
13853 dest = change_address (destmem, QImode, destptr);
13854 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13855 emit_label (label);
13856 LABEL_NUSES (label) = 1;
13860 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13861 DESIRED_ALIGNMENT. */
13863 expand_movmem_prologue (rtx destmem, rtx srcmem,
13864 rtx destptr, rtx srcptr, rtx count,
13865 int align, int desired_alignment)
13867 if (align <= 1 && desired_alignment > 1)
13869 rtx label = ix86_expand_aligntest (destptr, 1, false);
13870 srcmem = change_address (srcmem, QImode, srcptr);
13871 destmem = change_address (destmem, QImode, destptr);
13872 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13873 ix86_adjust_counter (count, 1);
13874 emit_label (label);
13875 LABEL_NUSES (label) = 1;
13877 if (align <= 2 && desired_alignment > 2)
13879 rtx label = ix86_expand_aligntest (destptr, 2, false);
13880 srcmem = change_address (srcmem, HImode, srcptr);
13881 destmem = change_address (destmem, HImode, destptr);
13882 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13883 ix86_adjust_counter (count, 2);
13884 emit_label (label);
13885 LABEL_NUSES (label) = 1;
13887 if (align <= 4 && desired_alignment > 4)
13889 rtx label = ix86_expand_aligntest (destptr, 4, false);
13890 srcmem = change_address (srcmem, SImode, srcptr);
13891 destmem = change_address (destmem, SImode, destptr);
13892 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13893 ix86_adjust_counter (count, 4);
13894 emit_label (label);
13895 LABEL_NUSES (label) = 1;
13897 gcc_assert (desired_alignment <= 8);
13900 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13901 DESIRED_ALIGNMENT. */
13903 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13904 int align, int desired_alignment)
13906 if (align <= 1 && desired_alignment > 1)
13908 rtx label = ix86_expand_aligntest (destptr, 1, false);
13909 destmem = change_address (destmem, QImode, destptr);
13910 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13911 ix86_adjust_counter (count, 1);
13912 emit_label (label);
13913 LABEL_NUSES (label) = 1;
13915 if (align <= 2 && desired_alignment > 2)
13917 rtx label = ix86_expand_aligntest (destptr, 2, false);
13918 destmem = change_address (destmem, HImode, destptr);
13919 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13920 ix86_adjust_counter (count, 2);
13921 emit_label (label);
13922 LABEL_NUSES (label) = 1;
13924 if (align <= 4 && desired_alignment > 4)
13926 rtx label = ix86_expand_aligntest (destptr, 4, false);
13927 destmem = change_address (destmem, SImode, destptr);
13928 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13929 ix86_adjust_counter (count, 4);
13930 emit_label (label);
13931 LABEL_NUSES (label) = 1;
13933 gcc_assert (desired_alignment <= 8);
13936 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13937 static enum stringop_alg
13938 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13939 int *dynamic_check)
13941 const struct stringop_algs * algs;
13943 *dynamic_check = -1;
13945 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13947 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13948 if (stringop_alg != no_stringop)
13949 return stringop_alg;
13950 /* rep; movq or rep; movl is the smallest variant. */
13951 else if (optimize_size)
13953 if (!count || (count & 3))
13954 return rep_prefix_1_byte;
13956 return rep_prefix_4_byte;
13958 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13960 else if (expected_size != -1 && expected_size < 4)
13961 return loop_1_byte;
13962 else if (expected_size != -1)
13965 enum stringop_alg alg = libcall;
13966 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13968 gcc_assert (algs->size[i].max);
13969 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13971 if (algs->size[i].alg != libcall)
13972 alg = algs->size[i].alg;
13973 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13974 last non-libcall inline algorithm. */
13975 if (TARGET_INLINE_ALL_STRINGOPS)
13977 /* When the current size is best to be copied by a libcall,
13978 but we are still forced to inline, run the heuristic bellow
13979 that will pick code for medium sized blocks. */
13980 if (alg != libcall)
13985 return algs->size[i].alg;
13988 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13990 /* When asked to inline the call anyway, try to pick meaningful choice.
13991 We look for maximal size of block that is faster to copy by hand and
13992 take blocks of at most of that size guessing that average size will
13993 be roughly half of the block.
13995 If this turns out to be bad, we might simply specify the preferred
13996 choice in ix86_costs. */
13997 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13998 && algs->unknown_size == libcall)
14001 enum stringop_alg alg;
14004 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14005 if (algs->size[i].alg != libcall && algs->size[i].alg)
14006 max = algs->size[i].max;
14009 alg = decide_alg (count, max / 2, memset, dynamic_check);
14010 gcc_assert (*dynamic_check == -1);
14011 gcc_assert (alg != libcall);
14012 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14013 *dynamic_check = max;
14016 return algs->unknown_size;
14019 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14020 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14022 decide_alignment (int align,
14023 enum stringop_alg alg,
14026 int desired_align = 0;
14030 gcc_unreachable ();
14032 case unrolled_loop:
14033 desired_align = GET_MODE_SIZE (Pmode);
14035 case rep_prefix_8_byte:
14038 case rep_prefix_4_byte:
14039 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14040 copying whole cacheline at once. */
14041 if (TARGET_PENTIUMPRO)
14046 case rep_prefix_1_byte:
14047 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14048 copying whole cacheline at once. */
14049 if (TARGET_PENTIUMPRO)
14063 if (desired_align < align)
14064 desired_align = align;
14065 if (expected_size != -1 && expected_size < 4)
14066 desired_align = align;
14067 return desired_align;
14070 /* Return the smallest power of 2 greater than VAL. */
14072 smallest_pow2_greater_than (int val)
14080 /* Expand string move (memcpy) operation. Use i386 string operations when
14081 profitable. expand_clrmem contains similar code. The code depends upon
14082 architecture, block size and alignment, but always has the same
14085 1) Prologue guard: Conditional that jumps up to epilogues for small
14086 blocks that can be handled by epilogue alone. This is faster but
14087 also needed for correctness, since prologue assume the block is larger
14088 than the desired alignment.
14090 Optional dynamic check for size and libcall for large
14091 blocks is emitted here too, with -minline-stringops-dynamically.
14093 2) Prologue: copy first few bytes in order to get destination aligned
14094 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14095 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14096 We emit either a jump tree on power of two sized blocks, or a byte loop.
14098 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14099 with specified algorithm.
14101 4) Epilogue: code copying tail of the block that is too small to be
14102 handled by main body (or up to size guarded by prologue guard). */
14105 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14106 rtx expected_align_exp, rtx expected_size_exp)
14112 rtx jump_around_label = NULL;
14113 HOST_WIDE_INT align = 1;
14114 unsigned HOST_WIDE_INT count = 0;
14115 HOST_WIDE_INT expected_size = -1;
14116 int size_needed = 0, epilogue_size_needed;
14117 int desired_align = 0;
14118 enum stringop_alg alg;
14121 if (CONST_INT_P (align_exp))
14122 align = INTVAL (align_exp);
14123 /* i386 can do misaligned access on reasonably increased cost. */
14124 if (CONST_INT_P (expected_align_exp)
14125 && INTVAL (expected_align_exp) > align)
14126 align = INTVAL (expected_align_exp);
14127 if (CONST_INT_P (count_exp))
14128 count = expected_size = INTVAL (count_exp);
14129 if (CONST_INT_P (expected_size_exp) && count == 0)
14130 expected_size = INTVAL (expected_size_exp);
14132 /* Step 0: Decide on preferred algorithm, desired alignment and
14133 size of chunks to be copied by main loop. */
14135 alg = decide_alg (count, expected_size, false, &dynamic_check);
14136 desired_align = decide_alignment (align, alg, expected_size);
14138 if (!TARGET_ALIGN_STRINGOPS)
14139 align = desired_align;
14141 if (alg == libcall)
14143 gcc_assert (alg != no_stringop);
14145 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14146 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14147 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14152 gcc_unreachable ();
14154 size_needed = GET_MODE_SIZE (Pmode);
14156 case unrolled_loop:
14157 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14159 case rep_prefix_8_byte:
14162 case rep_prefix_4_byte:
14165 case rep_prefix_1_byte:
14171 epilogue_size_needed = size_needed;
14173 /* Step 1: Prologue guard. */
14175 /* Alignment code needs count to be in register. */
14176 if (CONST_INT_P (count_exp) && desired_align > align)
14178 enum machine_mode mode = SImode;
14179 if (TARGET_64BIT && (count & ~0xffffffff))
14181 count_exp = force_reg (mode, count_exp);
14183 gcc_assert (desired_align >= 1 && align >= 1);
14185 /* Ensure that alignment prologue won't copy past end of block. */
14186 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14188 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14189 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14190 Make sure it is power of 2. */
14191 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14193 label = gen_label_rtx ();
14194 emit_cmp_and_jump_insns (count_exp,
14195 GEN_INT (epilogue_size_needed),
14196 LTU, 0, counter_mode (count_exp), 1, label);
14197 if (GET_CODE (count_exp) == CONST_INT)
14199 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14200 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14202 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14204 /* Emit code to decide on runtime whether library call or inline should be
14206 if (dynamic_check != -1)
14208 rtx hot_label = gen_label_rtx ();
14209 jump_around_label = gen_label_rtx ();
14210 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14211 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14212 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14213 emit_block_move_via_libcall (dst, src, count_exp, false);
14214 emit_jump (jump_around_label);
14215 emit_label (hot_label);
14218 /* Step 2: Alignment prologue. */
14220 if (desired_align > align)
14222 /* Except for the first move in epilogue, we no longer know
14223 constant offset in aliasing info. It don't seems to worth
14224 the pain to maintain it for the first move, so throw away
14226 src = change_address (src, BLKmode, srcreg);
14227 dst = change_address (dst, BLKmode, destreg);
14228 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14231 if (label && size_needed == 1)
14233 emit_label (label);
14234 LABEL_NUSES (label) = 1;
14238 /* Step 3: Main loop. */
14244 gcc_unreachable ();
14246 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14247 count_exp, QImode, 1, expected_size);
14250 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14251 count_exp, Pmode, 1, expected_size);
14253 case unrolled_loop:
14254 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14255 registers for 4 temporaries anyway. */
14256 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14257 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14260 case rep_prefix_8_byte:
14261 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14264 case rep_prefix_4_byte:
14265 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14268 case rep_prefix_1_byte:
14269 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14273 /* Adjust properly the offset of src and dest memory for aliasing. */
14274 if (CONST_INT_P (count_exp))
14276 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14277 (count / size_needed) * size_needed);
14278 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14279 (count / size_needed) * size_needed);
14283 src = change_address (src, BLKmode, srcreg);
14284 dst = change_address (dst, BLKmode, destreg);
14287 /* Step 4: Epilogue to copy the remaining bytes. */
14291 /* When the main loop is done, COUNT_EXP might hold original count,
14292 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14293 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14294 bytes. Compensate if needed. */
14296 if (size_needed < epilogue_size_needed)
14299 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14300 GEN_INT (size_needed - 1), count_exp, 1,
14302 if (tmp != count_exp)
14303 emit_move_insn (count_exp, tmp);
14305 emit_label (label);
14306 LABEL_NUSES (label) = 1;
14309 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14310 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14311 epilogue_size_needed);
14312 if (jump_around_label)
14313 emit_label (jump_around_label);
14317 /* Helper function for memcpy. For QImode value 0xXY produce
14318 0xXYXYXYXY of wide specified by MODE. This is essentially
14319 a * 0x10101010, but we can do slightly better than
14320 synth_mult by unwinding the sequence by hand on CPUs with
14323 promote_duplicated_reg (enum machine_mode mode, rtx val)
14325 enum machine_mode valmode = GET_MODE (val);
14327 int nops = mode == DImode ? 3 : 2;
14329 gcc_assert (mode == SImode || mode == DImode);
14330 if (val == const0_rtx)
14331 return copy_to_mode_reg (mode, const0_rtx);
14332 if (CONST_INT_P (val))
14334 HOST_WIDE_INT v = INTVAL (val) & 255;
14338 if (mode == DImode)
14339 v |= (v << 16) << 16;
14340 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14343 if (valmode == VOIDmode)
14345 if (valmode != QImode)
14346 val = gen_lowpart (QImode, val);
14347 if (mode == QImode)
14349 if (!TARGET_PARTIAL_REG_STALL)
14351 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14352 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14353 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14354 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14356 rtx reg = convert_modes (mode, QImode, val, true);
14357 tmp = promote_duplicated_reg (mode, const1_rtx);
14358 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14363 rtx reg = convert_modes (mode, QImode, val, true);
14365 if (!TARGET_PARTIAL_REG_STALL)
14366 if (mode == SImode)
14367 emit_insn (gen_movsi_insv_1 (reg, reg));
14369 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14372 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14373 NULL, 1, OPTAB_DIRECT);
14375 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14377 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14378 NULL, 1, OPTAB_DIRECT);
14379 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14380 if (mode == SImode)
14382 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14383 NULL, 1, OPTAB_DIRECT);
14384 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14389 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14390 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14391 alignment from ALIGN to DESIRED_ALIGN. */
14393 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14398 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14399 promoted_val = promote_duplicated_reg (DImode, val);
14400 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14401 promoted_val = promote_duplicated_reg (SImode, val);
14402 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14403 promoted_val = promote_duplicated_reg (HImode, val);
14405 promoted_val = val;
14407 return promoted_val;
14410 /* Expand string clear operation (bzero). Use i386 string operations when
14411 profitable. See expand_movmem comment for explanation of individual
14412 steps performed. */
14414 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14415 rtx expected_align_exp, rtx expected_size_exp)
14420 rtx jump_around_label = NULL;
14421 HOST_WIDE_INT align = 1;
14422 unsigned HOST_WIDE_INT count = 0;
14423 HOST_WIDE_INT expected_size = -1;
14424 int size_needed = 0, epilogue_size_needed;
14425 int desired_align = 0;
14426 enum stringop_alg alg;
14427 rtx promoted_val = NULL;
14428 bool force_loopy_epilogue = false;
14431 if (CONST_INT_P (align_exp))
14432 align = INTVAL (align_exp);
14433 /* i386 can do misaligned access on reasonably increased cost. */
14434 if (CONST_INT_P (expected_align_exp)
14435 && INTVAL (expected_align_exp) > align)
14436 align = INTVAL (expected_align_exp);
14437 if (CONST_INT_P (count_exp))
14438 count = expected_size = INTVAL (count_exp);
14439 if (CONST_INT_P (expected_size_exp) && count == 0)
14440 expected_size = INTVAL (expected_size_exp);
14442 /* Step 0: Decide on preferred algorithm, desired alignment and
14443 size of chunks to be copied by main loop. */
14445 alg = decide_alg (count, expected_size, true, &dynamic_check);
14446 desired_align = decide_alignment (align, alg, expected_size);
14448 if (!TARGET_ALIGN_STRINGOPS)
14449 align = desired_align;
14451 if (alg == libcall)
14453 gcc_assert (alg != no_stringop);
14455 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14456 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14461 gcc_unreachable ();
14463 size_needed = GET_MODE_SIZE (Pmode);
14465 case unrolled_loop:
14466 size_needed = GET_MODE_SIZE (Pmode) * 4;
14468 case rep_prefix_8_byte:
14471 case rep_prefix_4_byte:
14474 case rep_prefix_1_byte:
14479 epilogue_size_needed = size_needed;
14481 /* Step 1: Prologue guard. */
14483 /* Alignment code needs count to be in register. */
14484 if (CONST_INT_P (count_exp) && desired_align > align)
14486 enum machine_mode mode = SImode;
14487 if (TARGET_64BIT && (count & ~0xffffffff))
14489 count_exp = force_reg (mode, count_exp);
14491 /* Do the cheap promotion to allow better CSE across the
14492 main loop and epilogue (ie one load of the big constant in the
14493 front of all code. */
14494 if (CONST_INT_P (val_exp))
14495 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14496 desired_align, align);
14497 /* Ensure that alignment prologue won't copy past end of block. */
14498 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14500 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14501 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14502 Make sure it is power of 2. */
14503 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14505 /* To improve performance of small blocks, we jump around the VAL
14506 promoting mode. This mean that if the promoted VAL is not constant,
14507 we might not use it in the epilogue and have to use byte
14509 if (epilogue_size_needed > 2 && !promoted_val)
14510 force_loopy_epilogue = true;
14511 label = gen_label_rtx ();
14512 emit_cmp_and_jump_insns (count_exp,
14513 GEN_INT (epilogue_size_needed),
14514 LTU, 0, counter_mode (count_exp), 1, label);
14515 if (GET_CODE (count_exp) == CONST_INT)
14517 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14518 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14520 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14522 if (dynamic_check != -1)
14524 rtx hot_label = gen_label_rtx ();
14525 jump_around_label = gen_label_rtx ();
14526 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14527 LEU, 0, counter_mode (count_exp), 1, hot_label);
14528 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14529 set_storage_via_libcall (dst, count_exp, val_exp, false);
14530 emit_jump (jump_around_label);
14531 emit_label (hot_label);
14534 /* Step 2: Alignment prologue. */
14536 /* Do the expensive promotion once we branched off the small blocks. */
14538 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14539 desired_align, align);
14540 gcc_assert (desired_align >= 1 && align >= 1);
14542 if (desired_align > align)
14544 /* Except for the first move in epilogue, we no longer know
14545 constant offset in aliasing info. It don't seems to worth
14546 the pain to maintain it for the first move, so throw away
14548 dst = change_address (dst, BLKmode, destreg);
14549 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14552 if (label && size_needed == 1)
14554 emit_label (label);
14555 LABEL_NUSES (label) = 1;
14559 /* Step 3: Main loop. */
14565 gcc_unreachable ();
14567 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14568 count_exp, QImode, 1, expected_size);
14571 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14572 count_exp, Pmode, 1, expected_size);
14574 case unrolled_loop:
14575 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14576 count_exp, Pmode, 4, expected_size);
14578 case rep_prefix_8_byte:
14579 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14582 case rep_prefix_4_byte:
14583 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14586 case rep_prefix_1_byte:
14587 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14591 /* Adjust properly the offset of src and dest memory for aliasing. */
14592 if (CONST_INT_P (count_exp))
14593 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14594 (count / size_needed) * size_needed);
14596 dst = change_address (dst, BLKmode, destreg);
14598 /* Step 4: Epilogue to copy the remaining bytes. */
14602 /* When the main loop is done, COUNT_EXP might hold original count,
14603 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14604 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14605 bytes. Compensate if needed. */
14607 if (size_needed < desired_align - align)
14610 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14611 GEN_INT (size_needed - 1), count_exp, 1,
14613 size_needed = desired_align - align + 1;
14614 if (tmp != count_exp)
14615 emit_move_insn (count_exp, tmp);
14617 emit_label (label);
14618 LABEL_NUSES (label) = 1;
14620 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14622 if (force_loopy_epilogue)
14623 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14626 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14629 if (jump_around_label)
14630 emit_label (jump_around_label);
14634 /* Expand the appropriate insns for doing strlen if not just doing
14637 out = result, initialized with the start address
14638 align_rtx = alignment of the address.
14639 scratch = scratch register, initialized with the startaddress when
14640 not aligned, otherwise undefined
14642 This is just the body. It needs the initializations mentioned above and
14643 some address computing at the end. These things are done in i386.md. */
14646 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14650 rtx align_2_label = NULL_RTX;
14651 rtx align_3_label = NULL_RTX;
14652 rtx align_4_label = gen_label_rtx ();
14653 rtx end_0_label = gen_label_rtx ();
14655 rtx tmpreg = gen_reg_rtx (SImode);
14656 rtx scratch = gen_reg_rtx (SImode);
14660 if (CONST_INT_P (align_rtx))
14661 align = INTVAL (align_rtx);
14663 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14665 /* Is there a known alignment and is it less than 4? */
14668 rtx scratch1 = gen_reg_rtx (Pmode);
14669 emit_move_insn (scratch1, out);
14670 /* Is there a known alignment and is it not 2? */
14673 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14674 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14676 /* Leave just the 3 lower bits. */
14677 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14678 NULL_RTX, 0, OPTAB_WIDEN);
14680 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14681 Pmode, 1, align_4_label);
14682 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14683 Pmode, 1, align_2_label);
14684 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14685 Pmode, 1, align_3_label);
14689 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14690 check if is aligned to 4 - byte. */
14692 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14693 NULL_RTX, 0, OPTAB_WIDEN);
14695 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14696 Pmode, 1, align_4_label);
14699 mem = change_address (src, QImode, out);
14701 /* Now compare the bytes. */
14703 /* Compare the first n unaligned byte on a byte per byte basis. */
14704 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14705 QImode, 1, end_0_label);
14707 /* Increment the address. */
14709 emit_insn (gen_adddi3 (out, out, const1_rtx));
14711 emit_insn (gen_addsi3 (out, out, const1_rtx));
14713 /* Not needed with an alignment of 2 */
14716 emit_label (align_2_label);
14718 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14722 emit_insn (gen_adddi3 (out, out, const1_rtx));
14724 emit_insn (gen_addsi3 (out, out, const1_rtx));
14726 emit_label (align_3_label);
14729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14733 emit_insn (gen_adddi3 (out, out, const1_rtx));
14735 emit_insn (gen_addsi3 (out, out, const1_rtx));
14738 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14739 align this loop. It gives only huge programs, but does not help to
14741 emit_label (align_4_label);
14743 mem = change_address (src, SImode, out);
14744 emit_move_insn (scratch, mem);
14746 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14748 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14750 /* This formula yields a nonzero result iff one of the bytes is zero.
14751 This saves three branches inside loop and many cycles. */
14753 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14754 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14755 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14756 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14757 gen_int_mode (0x80808080, SImode)));
14758 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14763 rtx reg = gen_reg_rtx (SImode);
14764 rtx reg2 = gen_reg_rtx (Pmode);
14765 emit_move_insn (reg, tmpreg);
14766 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14768 /* If zero is not in the first two bytes, move two bytes forward. */
14769 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14770 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14771 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14772 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14773 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14776 /* Emit lea manually to avoid clobbering of flags. */
14777 emit_insn (gen_rtx_SET (SImode, reg2,
14778 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14780 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14781 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14782 emit_insn (gen_rtx_SET (VOIDmode, out,
14783 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14790 rtx end_2_label = gen_label_rtx ();
14791 /* Is zero in the first two bytes? */
14793 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14794 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14795 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14796 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14797 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14799 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14800 JUMP_LABEL (tmp) = end_2_label;
14802 /* Not in the first two. Move two bytes forward. */
14803 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14805 emit_insn (gen_adddi3 (out, out, const2_rtx));
14807 emit_insn (gen_addsi3 (out, out, const2_rtx));
14809 emit_label (end_2_label);
14813 /* Avoid branch in fixing the byte. */
14814 tmpreg = gen_lowpart (QImode, tmpreg);
14815 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14816 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14818 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14820 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14822 emit_label (end_0_label);
14825 /* Expand strlen. */
14828 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14830 rtx addr, scratch1, scratch2, scratch3, scratch4;
14832 /* The generic case of strlen expander is long. Avoid it's
14833 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14835 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14836 && !TARGET_INLINE_ALL_STRINGOPS
14838 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14841 addr = force_reg (Pmode, XEXP (src, 0));
14842 scratch1 = gen_reg_rtx (Pmode);
14844 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14847 /* Well it seems that some optimizer does not combine a call like
14848 foo(strlen(bar), strlen(bar));
14849 when the move and the subtraction is done here. It does calculate
14850 the length just once when these instructions are done inside of
14851 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14852 often used and I use one fewer register for the lifetime of
14853 output_strlen_unroll() this is better. */
14855 emit_move_insn (out, addr);
14857 ix86_expand_strlensi_unroll_1 (out, src, align);
14859 /* strlensi_unroll_1 returns the address of the zero at the end of
14860 the string, like memchr(), so compute the length by subtracting
14861 the start address. */
14863 emit_insn (gen_subdi3 (out, out, addr));
14865 emit_insn (gen_subsi3 (out, out, addr));
14870 scratch2 = gen_reg_rtx (Pmode);
14871 scratch3 = gen_reg_rtx (Pmode);
14872 scratch4 = force_reg (Pmode, constm1_rtx);
14874 emit_move_insn (scratch3, addr);
14875 eoschar = force_reg (QImode, eoschar);
14877 src = replace_equiv_address_nv (src, scratch3);
14879 /* If .md starts supporting :P, this can be done in .md. */
14880 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14881 scratch4), UNSPEC_SCAS);
14882 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14885 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14886 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14890 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14891 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14897 /* For given symbol (function) construct code to compute address of it's PLT
14898 entry in large x86-64 PIC model. */
14900 construct_plt_address (rtx symbol)
14902 rtx tmp = gen_reg_rtx (Pmode);
14903 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
14905 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
14906 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
14908 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
14909 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
14914 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14915 rtx callarg2 ATTRIBUTE_UNUSED,
14916 rtx pop, int sibcall)
14918 rtx use = NULL, call;
14920 if (pop == const0_rtx)
14922 gcc_assert (!TARGET_64BIT || !pop);
14924 if (TARGET_MACHO && !TARGET_64BIT)
14927 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14928 fnaddr = machopic_indirect_call_target (fnaddr);
14933 /* Static functions and indirect calls don't need the pic register. */
14934 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
14935 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14936 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14937 use_reg (&use, pic_offset_table_rtx);
14940 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14942 rtx al = gen_rtx_REG (QImode, 0);
14943 emit_move_insn (al, callarg2);
14944 use_reg (&use, al);
14947 if (ix86_cmodel == CM_LARGE_PIC
14948 && GET_CODE (fnaddr) == MEM
14949 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14950 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
14951 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
14952 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14954 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14955 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14957 if (sibcall && TARGET_64BIT
14958 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14961 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14962 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14963 emit_move_insn (fnaddr, addr);
14964 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14967 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14969 call = gen_rtx_SET (VOIDmode, retval, call);
14972 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14973 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14974 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14977 call = emit_call_insn (call);
14979 CALL_INSN_FUNCTION_USAGE (call) = use;
14983 /* Clear stack slot assignments remembered from previous functions.
14984 This is called from INIT_EXPANDERS once before RTL is emitted for each
14987 static struct machine_function *
14988 ix86_init_machine_status (void)
14990 struct machine_function *f;
14992 f = ggc_alloc_cleared (sizeof (struct machine_function));
14993 f->use_fast_prologue_epilogue_nregs = -1;
14994 f->tls_descriptor_call_expanded_p = 0;
14999 /* Return a MEM corresponding to a stack slot with mode MODE.
15000 Allocate a new slot if necessary.
15002 The RTL for a function can have several slots available: N is
15003 which slot to use. */
15006 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15008 struct stack_local_entry *s;
15010 gcc_assert (n < MAX_386_STACK_LOCALS);
15012 for (s = ix86_stack_locals; s; s = s->next)
15013 if (s->mode == mode && s->n == n)
15014 return copy_rtx (s->rtl);
15016 s = (struct stack_local_entry *)
15017 ggc_alloc (sizeof (struct stack_local_entry));
15020 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15022 s->next = ix86_stack_locals;
15023 ix86_stack_locals = s;
15027 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15029 static GTY(()) rtx ix86_tls_symbol;
15031 ix86_tls_get_addr (void)
15034 if (!ix86_tls_symbol)
15036 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15037 (TARGET_ANY_GNU_TLS
15039 ? "___tls_get_addr"
15040 : "__tls_get_addr");
15043 return ix86_tls_symbol;
15046 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15048 static GTY(()) rtx ix86_tls_module_base_symbol;
15050 ix86_tls_module_base (void)
15053 if (!ix86_tls_module_base_symbol)
15055 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15056 "_TLS_MODULE_BASE_");
15057 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15058 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15061 return ix86_tls_module_base_symbol;
15064 /* Calculate the length of the memory address in the instruction
15065 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15068 memory_address_length (rtx addr)
15070 struct ix86_address parts;
15071 rtx base, index, disp;
15075 if (GET_CODE (addr) == PRE_DEC
15076 || GET_CODE (addr) == POST_INC
15077 || GET_CODE (addr) == PRE_MODIFY
15078 || GET_CODE (addr) == POST_MODIFY)
15081 ok = ix86_decompose_address (addr, &parts);
15084 if (parts.base && GET_CODE (parts.base) == SUBREG)
15085 parts.base = SUBREG_REG (parts.base);
15086 if (parts.index && GET_CODE (parts.index) == SUBREG)
15087 parts.index = SUBREG_REG (parts.index);
15090 index = parts.index;
15095 - esp as the base always wants an index,
15096 - ebp as the base always wants a displacement. */
15098 /* Register Indirect. */
15099 if (base && !index && !disp)
15101 /* esp (for its index) and ebp (for its displacement) need
15102 the two-byte modrm form. */
15103 if (addr == stack_pointer_rtx
15104 || addr == arg_pointer_rtx
15105 || addr == frame_pointer_rtx
15106 || addr == hard_frame_pointer_rtx)
15110 /* Direct Addressing. */
15111 else if (disp && !base && !index)
15116 /* Find the length of the displacement constant. */
15119 if (base && satisfies_constraint_K (disp))
15124 /* ebp always wants a displacement. */
15125 else if (base == hard_frame_pointer_rtx)
15128 /* An index requires the two-byte modrm form.... */
15130 /* ...like esp, which always wants an index. */
15131 || base == stack_pointer_rtx
15132 || base == arg_pointer_rtx
15133 || base == frame_pointer_rtx)
15140 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15141 is set, expect that insn have 8bit immediate alternative. */
15143 ix86_attr_length_immediate_default (rtx insn, int shortform)
15147 extract_insn_cached (insn);
15148 for (i = recog_data.n_operands - 1; i >= 0; --i)
15149 if (CONSTANT_P (recog_data.operand[i]))
15152 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15156 switch (get_attr_mode (insn))
15167 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15172 fatal_insn ("unknown insn mode", insn);
15178 /* Compute default value for "length_address" attribute. */
15180 ix86_attr_length_address_default (rtx insn)
15184 if (get_attr_type (insn) == TYPE_LEA)
15186 rtx set = PATTERN (insn);
15188 if (GET_CODE (set) == PARALLEL)
15189 set = XVECEXP (set, 0, 0);
15191 gcc_assert (GET_CODE (set) == SET);
15193 return memory_address_length (SET_SRC (set));
15196 extract_insn_cached (insn);
15197 for (i = recog_data.n_operands - 1; i >= 0; --i)
15198 if (MEM_P (recog_data.operand[i]))
15200 return memory_address_length (XEXP (recog_data.operand[i], 0));
15206 /* Return the maximum number of instructions a cpu can issue. */
15209 ix86_issue_rate (void)
15213 case PROCESSOR_PENTIUM:
15217 case PROCESSOR_PENTIUMPRO:
15218 case PROCESSOR_PENTIUM4:
15219 case PROCESSOR_ATHLON:
15221 case PROCESSOR_AMDFAM10:
15222 case PROCESSOR_NOCONA:
15223 case PROCESSOR_GENERIC32:
15224 case PROCESSOR_GENERIC64:
15227 case PROCESSOR_CORE2:
15235 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15236 by DEP_INSN and nothing set by DEP_INSN. */
15239 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15243 /* Simplify the test for uninteresting insns. */
15244 if (insn_type != TYPE_SETCC
15245 && insn_type != TYPE_ICMOV
15246 && insn_type != TYPE_FCMOV
15247 && insn_type != TYPE_IBR)
15250 if ((set = single_set (dep_insn)) != 0)
15252 set = SET_DEST (set);
15255 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15256 && XVECLEN (PATTERN (dep_insn), 0) == 2
15257 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15258 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15260 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15261 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15266 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15269 /* This test is true if the dependent insn reads the flags but
15270 not any other potentially set register. */
15271 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15274 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15280 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15281 address with operands set by DEP_INSN. */
15284 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15288 if (insn_type == TYPE_LEA
15291 addr = PATTERN (insn);
15293 if (GET_CODE (addr) == PARALLEL)
15294 addr = XVECEXP (addr, 0, 0);
15296 gcc_assert (GET_CODE (addr) == SET);
15298 addr = SET_SRC (addr);
15303 extract_insn_cached (insn);
15304 for (i = recog_data.n_operands - 1; i >= 0; --i)
15305 if (MEM_P (recog_data.operand[i]))
15307 addr = XEXP (recog_data.operand[i], 0);
15314 return modified_in_p (addr, dep_insn);
15318 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15320 enum attr_type insn_type, dep_insn_type;
15321 enum attr_memory memory;
15323 int dep_insn_code_number;
15325 /* Anti and output dependencies have zero cost on all CPUs. */
15326 if (REG_NOTE_KIND (link) != 0)
15329 dep_insn_code_number = recog_memoized (dep_insn);
15331 /* If we can't recognize the insns, we can't really do anything. */
15332 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15335 insn_type = get_attr_type (insn);
15336 dep_insn_type = get_attr_type (dep_insn);
15340 case PROCESSOR_PENTIUM:
15341 /* Address Generation Interlock adds a cycle of latency. */
15342 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15345 /* ??? Compares pair with jump/setcc. */
15346 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15349 /* Floating point stores require value to be ready one cycle earlier. */
15350 if (insn_type == TYPE_FMOV
15351 && get_attr_memory (insn) == MEMORY_STORE
15352 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15356 case PROCESSOR_PENTIUMPRO:
15357 memory = get_attr_memory (insn);
15359 /* INT->FP conversion is expensive. */
15360 if (get_attr_fp_int_src (dep_insn))
15363 /* There is one cycle extra latency between an FP op and a store. */
15364 if (insn_type == TYPE_FMOV
15365 && (set = single_set (dep_insn)) != NULL_RTX
15366 && (set2 = single_set (insn)) != NULL_RTX
15367 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15368 && MEM_P (SET_DEST (set2)))
15371 /* Show ability of reorder buffer to hide latency of load by executing
15372 in parallel with previous instruction in case
15373 previous instruction is not needed to compute the address. */
15374 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15375 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15377 /* Claim moves to take one cycle, as core can issue one load
15378 at time and the next load can start cycle later. */
15379 if (dep_insn_type == TYPE_IMOV
15380 || dep_insn_type == TYPE_FMOV)
15388 memory = get_attr_memory (insn);
15390 /* The esp dependency is resolved before the instruction is really
15392 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15393 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15396 /* INT->FP conversion is expensive. */
15397 if (get_attr_fp_int_src (dep_insn))
15400 /* Show ability of reorder buffer to hide latency of load by executing
15401 in parallel with previous instruction in case
15402 previous instruction is not needed to compute the address. */
15403 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15404 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15406 /* Claim moves to take one cycle, as core can issue one load
15407 at time and the next load can start cycle later. */
15408 if (dep_insn_type == TYPE_IMOV
15409 || dep_insn_type == TYPE_FMOV)
15418 case PROCESSOR_ATHLON:
15420 case PROCESSOR_AMDFAM10:
15421 case PROCESSOR_GENERIC32:
15422 case PROCESSOR_GENERIC64:
15423 memory = get_attr_memory (insn);
15425 /* Show ability of reorder buffer to hide latency of load by executing
15426 in parallel with previous instruction in case
15427 previous instruction is not needed to compute the address. */
15428 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15429 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15431 enum attr_unit unit = get_attr_unit (insn);
15434 /* Because of the difference between the length of integer and
15435 floating unit pipeline preparation stages, the memory operands
15436 for floating point are cheaper.
15438 ??? For Athlon it the difference is most probably 2. */
15439 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15442 loadcost = TARGET_ATHLON ? 2 : 0;
15444 if (cost >= loadcost)
15457 /* How many alternative schedules to try. This should be as wide as the
15458 scheduling freedom in the DFA, but no wider. Making this value too
15459 large results extra work for the scheduler. */
15462 ia32_multipass_dfa_lookahead (void)
15464 if (ix86_tune == PROCESSOR_PENTIUM)
15467 if (ix86_tune == PROCESSOR_PENTIUMPRO
15468 || ix86_tune == PROCESSOR_K6)
15476 /* Compute the alignment given to a constant that is being placed in memory.
15477 EXP is the constant and ALIGN is the alignment that the object would
15479 The value of this function is used instead of that alignment to align
15483 ix86_constant_alignment (tree exp, int align)
15485 if (TREE_CODE (exp) == REAL_CST)
15487 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15489 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15492 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15493 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15494 return BITS_PER_WORD;
15499 /* Compute the alignment for a static variable.
15500 TYPE is the data type, and ALIGN is the alignment that
15501 the object would ordinarily have. The value of this function is used
15502 instead of that alignment to align the object. */
15505 ix86_data_alignment (tree type, int align)
15507 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15509 if (AGGREGATE_TYPE_P (type)
15510 && TYPE_SIZE (type)
15511 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15512 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15513 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15514 && align < max_align)
15517 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15518 to 16byte boundary. */
15521 if (AGGREGATE_TYPE_P (type)
15522 && TYPE_SIZE (type)
15523 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15524 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15525 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15529 if (TREE_CODE (type) == ARRAY_TYPE)
15531 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15533 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15536 else if (TREE_CODE (type) == COMPLEX_TYPE)
15539 if (TYPE_MODE (type) == DCmode && align < 64)
15541 if (TYPE_MODE (type) == XCmode && align < 128)
15544 else if ((TREE_CODE (type) == RECORD_TYPE
15545 || TREE_CODE (type) == UNION_TYPE
15546 || TREE_CODE (type) == QUAL_UNION_TYPE)
15547 && TYPE_FIELDS (type))
15549 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15551 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15554 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15555 || TREE_CODE (type) == INTEGER_TYPE)
15557 if (TYPE_MODE (type) == DFmode && align < 64)
15559 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15566 /* Compute the alignment for a local variable.
15567 TYPE is the data type, and ALIGN is the alignment that
15568 the object would ordinarily have. The value of this macro is used
15569 instead of that alignment to align the object. */
15572 ix86_local_alignment (tree type, int align)
15574 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15575 to 16byte boundary. */
15578 if (AGGREGATE_TYPE_P (type)
15579 && TYPE_SIZE (type)
15580 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15581 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15582 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15585 if (TREE_CODE (type) == ARRAY_TYPE)
15587 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15589 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15592 else if (TREE_CODE (type) == COMPLEX_TYPE)
15594 if (TYPE_MODE (type) == DCmode && align < 64)
15596 if (TYPE_MODE (type) == XCmode && align < 128)
15599 else if ((TREE_CODE (type) == RECORD_TYPE
15600 || TREE_CODE (type) == UNION_TYPE
15601 || TREE_CODE (type) == QUAL_UNION_TYPE)
15602 && TYPE_FIELDS (type))
15604 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15606 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15609 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15610 || TREE_CODE (type) == INTEGER_TYPE)
15613 if (TYPE_MODE (type) == DFmode && align < 64)
15615 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15621 /* Emit RTL insns to initialize the variable parts of a trampoline.
15622 FNADDR is an RTX for the address of the function's pure code.
15623 CXT is an RTX for the static chain value for the function. */
15625 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15629 /* Compute offset from the end of the jmp to the target function. */
15630 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15631 plus_constant (tramp, 10),
15632 NULL_RTX, 1, OPTAB_DIRECT);
15633 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15634 gen_int_mode (0xb9, QImode));
15635 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15636 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15637 gen_int_mode (0xe9, QImode));
15638 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15643 /* Try to load address using shorter movl instead of movabs.
15644 We may want to support movq for kernel mode, but kernel does not use
15645 trampolines at the moment. */
15646 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15648 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15649 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15650 gen_int_mode (0xbb41, HImode));
15651 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15652 gen_lowpart (SImode, fnaddr));
15657 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15658 gen_int_mode (0xbb49, HImode));
15659 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15663 /* Load static chain using movabs to r10. */
15664 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15665 gen_int_mode (0xba49, HImode));
15666 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15669 /* Jump to the r11 */
15670 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15671 gen_int_mode (0xff49, HImode));
15672 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15673 gen_int_mode (0xe3, QImode));
15675 gcc_assert (offset <= TRAMPOLINE_SIZE);
15678 #ifdef ENABLE_EXECUTE_STACK
15679 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15680 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15684 /* Codes for all the SSE/MMX builtins. */
15687 IX86_BUILTIN_ADDPS,
15688 IX86_BUILTIN_ADDSS,
15689 IX86_BUILTIN_DIVPS,
15690 IX86_BUILTIN_DIVSS,
15691 IX86_BUILTIN_MULPS,
15692 IX86_BUILTIN_MULSS,
15693 IX86_BUILTIN_SUBPS,
15694 IX86_BUILTIN_SUBSS,
15696 IX86_BUILTIN_CMPEQPS,
15697 IX86_BUILTIN_CMPLTPS,
15698 IX86_BUILTIN_CMPLEPS,
15699 IX86_BUILTIN_CMPGTPS,
15700 IX86_BUILTIN_CMPGEPS,
15701 IX86_BUILTIN_CMPNEQPS,
15702 IX86_BUILTIN_CMPNLTPS,
15703 IX86_BUILTIN_CMPNLEPS,
15704 IX86_BUILTIN_CMPNGTPS,
15705 IX86_BUILTIN_CMPNGEPS,
15706 IX86_BUILTIN_CMPORDPS,
15707 IX86_BUILTIN_CMPUNORDPS,
15708 IX86_BUILTIN_CMPEQSS,
15709 IX86_BUILTIN_CMPLTSS,
15710 IX86_BUILTIN_CMPLESS,
15711 IX86_BUILTIN_CMPNEQSS,
15712 IX86_BUILTIN_CMPNLTSS,
15713 IX86_BUILTIN_CMPNLESS,
15714 IX86_BUILTIN_CMPNGTSS,
15715 IX86_BUILTIN_CMPNGESS,
15716 IX86_BUILTIN_CMPORDSS,
15717 IX86_BUILTIN_CMPUNORDSS,
15719 IX86_BUILTIN_COMIEQSS,
15720 IX86_BUILTIN_COMILTSS,
15721 IX86_BUILTIN_COMILESS,
15722 IX86_BUILTIN_COMIGTSS,
15723 IX86_BUILTIN_COMIGESS,
15724 IX86_BUILTIN_COMINEQSS,
15725 IX86_BUILTIN_UCOMIEQSS,
15726 IX86_BUILTIN_UCOMILTSS,
15727 IX86_BUILTIN_UCOMILESS,
15728 IX86_BUILTIN_UCOMIGTSS,
15729 IX86_BUILTIN_UCOMIGESS,
15730 IX86_BUILTIN_UCOMINEQSS,
15732 IX86_BUILTIN_CVTPI2PS,
15733 IX86_BUILTIN_CVTPS2PI,
15734 IX86_BUILTIN_CVTSI2SS,
15735 IX86_BUILTIN_CVTSI642SS,
15736 IX86_BUILTIN_CVTSS2SI,
15737 IX86_BUILTIN_CVTSS2SI64,
15738 IX86_BUILTIN_CVTTPS2PI,
15739 IX86_BUILTIN_CVTTSS2SI,
15740 IX86_BUILTIN_CVTTSS2SI64,
15742 IX86_BUILTIN_MAXPS,
15743 IX86_BUILTIN_MAXSS,
15744 IX86_BUILTIN_MINPS,
15745 IX86_BUILTIN_MINSS,
15747 IX86_BUILTIN_LOADUPS,
15748 IX86_BUILTIN_STOREUPS,
15749 IX86_BUILTIN_MOVSS,
15751 IX86_BUILTIN_MOVHLPS,
15752 IX86_BUILTIN_MOVLHPS,
15753 IX86_BUILTIN_LOADHPS,
15754 IX86_BUILTIN_LOADLPS,
15755 IX86_BUILTIN_STOREHPS,
15756 IX86_BUILTIN_STORELPS,
15758 IX86_BUILTIN_MASKMOVQ,
15759 IX86_BUILTIN_MOVMSKPS,
15760 IX86_BUILTIN_PMOVMSKB,
15762 IX86_BUILTIN_MOVNTPS,
15763 IX86_BUILTIN_MOVNTQ,
15765 IX86_BUILTIN_LOADDQU,
15766 IX86_BUILTIN_STOREDQU,
15768 IX86_BUILTIN_PACKSSWB,
15769 IX86_BUILTIN_PACKSSDW,
15770 IX86_BUILTIN_PACKUSWB,
15772 IX86_BUILTIN_PADDB,
15773 IX86_BUILTIN_PADDW,
15774 IX86_BUILTIN_PADDD,
15775 IX86_BUILTIN_PADDQ,
15776 IX86_BUILTIN_PADDSB,
15777 IX86_BUILTIN_PADDSW,
15778 IX86_BUILTIN_PADDUSB,
15779 IX86_BUILTIN_PADDUSW,
15780 IX86_BUILTIN_PSUBB,
15781 IX86_BUILTIN_PSUBW,
15782 IX86_BUILTIN_PSUBD,
15783 IX86_BUILTIN_PSUBQ,
15784 IX86_BUILTIN_PSUBSB,
15785 IX86_BUILTIN_PSUBSW,
15786 IX86_BUILTIN_PSUBUSB,
15787 IX86_BUILTIN_PSUBUSW,
15790 IX86_BUILTIN_PANDN,
15794 IX86_BUILTIN_PAVGB,
15795 IX86_BUILTIN_PAVGW,
15797 IX86_BUILTIN_PCMPEQB,
15798 IX86_BUILTIN_PCMPEQW,
15799 IX86_BUILTIN_PCMPEQD,
15800 IX86_BUILTIN_PCMPGTB,
15801 IX86_BUILTIN_PCMPGTW,
15802 IX86_BUILTIN_PCMPGTD,
15804 IX86_BUILTIN_PMADDWD,
15806 IX86_BUILTIN_PMAXSW,
15807 IX86_BUILTIN_PMAXUB,
15808 IX86_BUILTIN_PMINSW,
15809 IX86_BUILTIN_PMINUB,
15811 IX86_BUILTIN_PMULHUW,
15812 IX86_BUILTIN_PMULHW,
15813 IX86_BUILTIN_PMULLW,
15815 IX86_BUILTIN_PSADBW,
15816 IX86_BUILTIN_PSHUFW,
15818 IX86_BUILTIN_PSLLW,
15819 IX86_BUILTIN_PSLLD,
15820 IX86_BUILTIN_PSLLQ,
15821 IX86_BUILTIN_PSRAW,
15822 IX86_BUILTIN_PSRAD,
15823 IX86_BUILTIN_PSRLW,
15824 IX86_BUILTIN_PSRLD,
15825 IX86_BUILTIN_PSRLQ,
15826 IX86_BUILTIN_PSLLWI,
15827 IX86_BUILTIN_PSLLDI,
15828 IX86_BUILTIN_PSLLQI,
15829 IX86_BUILTIN_PSRAWI,
15830 IX86_BUILTIN_PSRADI,
15831 IX86_BUILTIN_PSRLWI,
15832 IX86_BUILTIN_PSRLDI,
15833 IX86_BUILTIN_PSRLQI,
15835 IX86_BUILTIN_PUNPCKHBW,
15836 IX86_BUILTIN_PUNPCKHWD,
15837 IX86_BUILTIN_PUNPCKHDQ,
15838 IX86_BUILTIN_PUNPCKLBW,
15839 IX86_BUILTIN_PUNPCKLWD,
15840 IX86_BUILTIN_PUNPCKLDQ,
15842 IX86_BUILTIN_SHUFPS,
15844 IX86_BUILTIN_RCPPS,
15845 IX86_BUILTIN_RCPSS,
15846 IX86_BUILTIN_RSQRTPS,
15847 IX86_BUILTIN_RSQRTSS,
15848 IX86_BUILTIN_SQRTPS,
15849 IX86_BUILTIN_SQRTSS,
15851 IX86_BUILTIN_UNPCKHPS,
15852 IX86_BUILTIN_UNPCKLPS,
15854 IX86_BUILTIN_ANDPS,
15855 IX86_BUILTIN_ANDNPS,
15857 IX86_BUILTIN_XORPS,
15860 IX86_BUILTIN_LDMXCSR,
15861 IX86_BUILTIN_STMXCSR,
15862 IX86_BUILTIN_SFENCE,
15864 /* 3DNow! Original */
15865 IX86_BUILTIN_FEMMS,
15866 IX86_BUILTIN_PAVGUSB,
15867 IX86_BUILTIN_PF2ID,
15868 IX86_BUILTIN_PFACC,
15869 IX86_BUILTIN_PFADD,
15870 IX86_BUILTIN_PFCMPEQ,
15871 IX86_BUILTIN_PFCMPGE,
15872 IX86_BUILTIN_PFCMPGT,
15873 IX86_BUILTIN_PFMAX,
15874 IX86_BUILTIN_PFMIN,
15875 IX86_BUILTIN_PFMUL,
15876 IX86_BUILTIN_PFRCP,
15877 IX86_BUILTIN_PFRCPIT1,
15878 IX86_BUILTIN_PFRCPIT2,
15879 IX86_BUILTIN_PFRSQIT1,
15880 IX86_BUILTIN_PFRSQRT,
15881 IX86_BUILTIN_PFSUB,
15882 IX86_BUILTIN_PFSUBR,
15883 IX86_BUILTIN_PI2FD,
15884 IX86_BUILTIN_PMULHRW,
15886 /* 3DNow! Athlon Extensions */
15887 IX86_BUILTIN_PF2IW,
15888 IX86_BUILTIN_PFNACC,
15889 IX86_BUILTIN_PFPNACC,
15890 IX86_BUILTIN_PI2FW,
15891 IX86_BUILTIN_PSWAPDSI,
15892 IX86_BUILTIN_PSWAPDSF,
15895 IX86_BUILTIN_ADDPD,
15896 IX86_BUILTIN_ADDSD,
15897 IX86_BUILTIN_DIVPD,
15898 IX86_BUILTIN_DIVSD,
15899 IX86_BUILTIN_MULPD,
15900 IX86_BUILTIN_MULSD,
15901 IX86_BUILTIN_SUBPD,
15902 IX86_BUILTIN_SUBSD,
15904 IX86_BUILTIN_CMPEQPD,
15905 IX86_BUILTIN_CMPLTPD,
15906 IX86_BUILTIN_CMPLEPD,
15907 IX86_BUILTIN_CMPGTPD,
15908 IX86_BUILTIN_CMPGEPD,
15909 IX86_BUILTIN_CMPNEQPD,
15910 IX86_BUILTIN_CMPNLTPD,
15911 IX86_BUILTIN_CMPNLEPD,
15912 IX86_BUILTIN_CMPNGTPD,
15913 IX86_BUILTIN_CMPNGEPD,
15914 IX86_BUILTIN_CMPORDPD,
15915 IX86_BUILTIN_CMPUNORDPD,
15916 IX86_BUILTIN_CMPEQSD,
15917 IX86_BUILTIN_CMPLTSD,
15918 IX86_BUILTIN_CMPLESD,
15919 IX86_BUILTIN_CMPNEQSD,
15920 IX86_BUILTIN_CMPNLTSD,
15921 IX86_BUILTIN_CMPNLESD,
15922 IX86_BUILTIN_CMPORDSD,
15923 IX86_BUILTIN_CMPUNORDSD,
15925 IX86_BUILTIN_COMIEQSD,
15926 IX86_BUILTIN_COMILTSD,
15927 IX86_BUILTIN_COMILESD,
15928 IX86_BUILTIN_COMIGTSD,
15929 IX86_BUILTIN_COMIGESD,
15930 IX86_BUILTIN_COMINEQSD,
15931 IX86_BUILTIN_UCOMIEQSD,
15932 IX86_BUILTIN_UCOMILTSD,
15933 IX86_BUILTIN_UCOMILESD,
15934 IX86_BUILTIN_UCOMIGTSD,
15935 IX86_BUILTIN_UCOMIGESD,
15936 IX86_BUILTIN_UCOMINEQSD,
15938 IX86_BUILTIN_MAXPD,
15939 IX86_BUILTIN_MAXSD,
15940 IX86_BUILTIN_MINPD,
15941 IX86_BUILTIN_MINSD,
15943 IX86_BUILTIN_ANDPD,
15944 IX86_BUILTIN_ANDNPD,
15946 IX86_BUILTIN_XORPD,
15948 IX86_BUILTIN_SQRTPD,
15949 IX86_BUILTIN_SQRTSD,
15951 IX86_BUILTIN_UNPCKHPD,
15952 IX86_BUILTIN_UNPCKLPD,
15954 IX86_BUILTIN_SHUFPD,
15956 IX86_BUILTIN_LOADUPD,
15957 IX86_BUILTIN_STOREUPD,
15958 IX86_BUILTIN_MOVSD,
15960 IX86_BUILTIN_LOADHPD,
15961 IX86_BUILTIN_LOADLPD,
15963 IX86_BUILTIN_CVTDQ2PD,
15964 IX86_BUILTIN_CVTDQ2PS,
15966 IX86_BUILTIN_CVTPD2DQ,
15967 IX86_BUILTIN_CVTPD2PI,
15968 IX86_BUILTIN_CVTPD2PS,
15969 IX86_BUILTIN_CVTTPD2DQ,
15970 IX86_BUILTIN_CVTTPD2PI,
15972 IX86_BUILTIN_CVTPI2PD,
15973 IX86_BUILTIN_CVTSI2SD,
15974 IX86_BUILTIN_CVTSI642SD,
15976 IX86_BUILTIN_CVTSD2SI,
15977 IX86_BUILTIN_CVTSD2SI64,
15978 IX86_BUILTIN_CVTSD2SS,
15979 IX86_BUILTIN_CVTSS2SD,
15980 IX86_BUILTIN_CVTTSD2SI,
15981 IX86_BUILTIN_CVTTSD2SI64,
15983 IX86_BUILTIN_CVTPS2DQ,
15984 IX86_BUILTIN_CVTPS2PD,
15985 IX86_BUILTIN_CVTTPS2DQ,
15987 IX86_BUILTIN_MOVNTI,
15988 IX86_BUILTIN_MOVNTPD,
15989 IX86_BUILTIN_MOVNTDQ,
15992 IX86_BUILTIN_MASKMOVDQU,
15993 IX86_BUILTIN_MOVMSKPD,
15994 IX86_BUILTIN_PMOVMSKB128,
15996 IX86_BUILTIN_PACKSSWB128,
15997 IX86_BUILTIN_PACKSSDW128,
15998 IX86_BUILTIN_PACKUSWB128,
16000 IX86_BUILTIN_PADDB128,
16001 IX86_BUILTIN_PADDW128,
16002 IX86_BUILTIN_PADDD128,
16003 IX86_BUILTIN_PADDQ128,
16004 IX86_BUILTIN_PADDSB128,
16005 IX86_BUILTIN_PADDSW128,
16006 IX86_BUILTIN_PADDUSB128,
16007 IX86_BUILTIN_PADDUSW128,
16008 IX86_BUILTIN_PSUBB128,
16009 IX86_BUILTIN_PSUBW128,
16010 IX86_BUILTIN_PSUBD128,
16011 IX86_BUILTIN_PSUBQ128,
16012 IX86_BUILTIN_PSUBSB128,
16013 IX86_BUILTIN_PSUBSW128,
16014 IX86_BUILTIN_PSUBUSB128,
16015 IX86_BUILTIN_PSUBUSW128,
16017 IX86_BUILTIN_PAND128,
16018 IX86_BUILTIN_PANDN128,
16019 IX86_BUILTIN_POR128,
16020 IX86_BUILTIN_PXOR128,
16022 IX86_BUILTIN_PAVGB128,
16023 IX86_BUILTIN_PAVGW128,
16025 IX86_BUILTIN_PCMPEQB128,
16026 IX86_BUILTIN_PCMPEQW128,
16027 IX86_BUILTIN_PCMPEQD128,
16028 IX86_BUILTIN_PCMPGTB128,
16029 IX86_BUILTIN_PCMPGTW128,
16030 IX86_BUILTIN_PCMPGTD128,
16032 IX86_BUILTIN_PMADDWD128,
16034 IX86_BUILTIN_PMAXSW128,
16035 IX86_BUILTIN_PMAXUB128,
16036 IX86_BUILTIN_PMINSW128,
16037 IX86_BUILTIN_PMINUB128,
16039 IX86_BUILTIN_PMULUDQ,
16040 IX86_BUILTIN_PMULUDQ128,
16041 IX86_BUILTIN_PMULHUW128,
16042 IX86_BUILTIN_PMULHW128,
16043 IX86_BUILTIN_PMULLW128,
16045 IX86_BUILTIN_PSADBW128,
16046 IX86_BUILTIN_PSHUFHW,
16047 IX86_BUILTIN_PSHUFLW,
16048 IX86_BUILTIN_PSHUFD,
16050 IX86_BUILTIN_PSLLDQI128,
16051 IX86_BUILTIN_PSLLWI128,
16052 IX86_BUILTIN_PSLLDI128,
16053 IX86_BUILTIN_PSLLQI128,
16054 IX86_BUILTIN_PSRAWI128,
16055 IX86_BUILTIN_PSRADI128,
16056 IX86_BUILTIN_PSRLDQI128,
16057 IX86_BUILTIN_PSRLWI128,
16058 IX86_BUILTIN_PSRLDI128,
16059 IX86_BUILTIN_PSRLQI128,
16061 IX86_BUILTIN_PSLLDQ128,
16062 IX86_BUILTIN_PSLLW128,
16063 IX86_BUILTIN_PSLLD128,
16064 IX86_BUILTIN_PSLLQ128,
16065 IX86_BUILTIN_PSRAW128,
16066 IX86_BUILTIN_PSRAD128,
16067 IX86_BUILTIN_PSRLW128,
16068 IX86_BUILTIN_PSRLD128,
16069 IX86_BUILTIN_PSRLQ128,
16071 IX86_BUILTIN_PUNPCKHBW128,
16072 IX86_BUILTIN_PUNPCKHWD128,
16073 IX86_BUILTIN_PUNPCKHDQ128,
16074 IX86_BUILTIN_PUNPCKHQDQ128,
16075 IX86_BUILTIN_PUNPCKLBW128,
16076 IX86_BUILTIN_PUNPCKLWD128,
16077 IX86_BUILTIN_PUNPCKLDQ128,
16078 IX86_BUILTIN_PUNPCKLQDQ128,
16080 IX86_BUILTIN_CLFLUSH,
16081 IX86_BUILTIN_MFENCE,
16082 IX86_BUILTIN_LFENCE,
16084 /* Prescott New Instructions. */
16085 IX86_BUILTIN_ADDSUBPS,
16086 IX86_BUILTIN_HADDPS,
16087 IX86_BUILTIN_HSUBPS,
16088 IX86_BUILTIN_MOVSHDUP,
16089 IX86_BUILTIN_MOVSLDUP,
16090 IX86_BUILTIN_ADDSUBPD,
16091 IX86_BUILTIN_HADDPD,
16092 IX86_BUILTIN_HSUBPD,
16093 IX86_BUILTIN_LDDQU,
16095 IX86_BUILTIN_MONITOR,
16096 IX86_BUILTIN_MWAIT,
16099 IX86_BUILTIN_PHADDW,
16100 IX86_BUILTIN_PHADDD,
16101 IX86_BUILTIN_PHADDSW,
16102 IX86_BUILTIN_PHSUBW,
16103 IX86_BUILTIN_PHSUBD,
16104 IX86_BUILTIN_PHSUBSW,
16105 IX86_BUILTIN_PMADDUBSW,
16106 IX86_BUILTIN_PMULHRSW,
16107 IX86_BUILTIN_PSHUFB,
16108 IX86_BUILTIN_PSIGNB,
16109 IX86_BUILTIN_PSIGNW,
16110 IX86_BUILTIN_PSIGND,
16111 IX86_BUILTIN_PALIGNR,
16112 IX86_BUILTIN_PABSB,
16113 IX86_BUILTIN_PABSW,
16114 IX86_BUILTIN_PABSD,
16116 IX86_BUILTIN_PHADDW128,
16117 IX86_BUILTIN_PHADDD128,
16118 IX86_BUILTIN_PHADDSW128,
16119 IX86_BUILTIN_PHSUBW128,
16120 IX86_BUILTIN_PHSUBD128,
16121 IX86_BUILTIN_PHSUBSW128,
16122 IX86_BUILTIN_PMADDUBSW128,
16123 IX86_BUILTIN_PMULHRSW128,
16124 IX86_BUILTIN_PSHUFB128,
16125 IX86_BUILTIN_PSIGNB128,
16126 IX86_BUILTIN_PSIGNW128,
16127 IX86_BUILTIN_PSIGND128,
16128 IX86_BUILTIN_PALIGNR128,
16129 IX86_BUILTIN_PABSB128,
16130 IX86_BUILTIN_PABSW128,
16131 IX86_BUILTIN_PABSD128,
16133 /* AMDFAM10 - SSE4A New Instructions. */
16134 IX86_BUILTIN_MOVNTSD,
16135 IX86_BUILTIN_MOVNTSS,
16136 IX86_BUILTIN_EXTRQI,
16137 IX86_BUILTIN_EXTRQ,
16138 IX86_BUILTIN_INSERTQI,
16139 IX86_BUILTIN_INSERTQ,
16141 IX86_BUILTIN_VEC_INIT_V2SI,
16142 IX86_BUILTIN_VEC_INIT_V4HI,
16143 IX86_BUILTIN_VEC_INIT_V8QI,
16144 IX86_BUILTIN_VEC_EXT_V2DF,
16145 IX86_BUILTIN_VEC_EXT_V2DI,
16146 IX86_BUILTIN_VEC_EXT_V4SF,
16147 IX86_BUILTIN_VEC_EXT_V4SI,
16148 IX86_BUILTIN_VEC_EXT_V8HI,
16149 IX86_BUILTIN_VEC_EXT_V2SI,
16150 IX86_BUILTIN_VEC_EXT_V4HI,
16151 IX86_BUILTIN_VEC_SET_V8HI,
16152 IX86_BUILTIN_VEC_SET_V4HI,
16157 /* Table for the ix86 builtin decls. */
16158 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16160 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16161 * if the target_flags include one of MASK. Stores the function decl
16162 * in the ix86_builtins array.
16163 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16166 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16168 tree decl = NULL_TREE;
16170 if (mask & target_flags
16171 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16173 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16175 ix86_builtins[(int) code] = decl;
16181 /* Like def_builtin, but also marks the function decl "const". */
16184 def_builtin_const (int mask, const char *name, tree type,
16185 enum ix86_builtins code)
16187 tree decl = def_builtin (mask, name, type, code);
16189 TREE_READONLY (decl) = 1;
16193 /* Bits for builtin_description.flag. */
16195 /* Set when we don't support the comparison natively, and should
16196 swap_comparison in order to support it. */
16197 #define BUILTIN_DESC_SWAP_OPERANDS 1
16199 struct builtin_description
16201 const unsigned int mask;
16202 const enum insn_code icode;
16203 const char *const name;
16204 const enum ix86_builtins code;
16205 const enum rtx_code comparison;
16206 const unsigned int flag;
16209 static const struct builtin_description bdesc_comi[] =
16211 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16212 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16213 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16214 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16215 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16216 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16217 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16218 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16219 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16220 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16221 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16222 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16223 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16224 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16225 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16226 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16227 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16228 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16229 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16230 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16231 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16232 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16233 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16234 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16237 static const struct builtin_description bdesc_2arg[] =
16240 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16241 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16242 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16243 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16244 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16245 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16246 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16247 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16249 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16250 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16251 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16252 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16253 BUILTIN_DESC_SWAP_OPERANDS },
16254 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16255 BUILTIN_DESC_SWAP_OPERANDS },
16256 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16257 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16258 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16259 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16260 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16261 BUILTIN_DESC_SWAP_OPERANDS },
16262 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16263 BUILTIN_DESC_SWAP_OPERANDS },
16264 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16265 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16266 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16267 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16268 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16269 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16270 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16271 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16272 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16273 BUILTIN_DESC_SWAP_OPERANDS },
16274 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16275 BUILTIN_DESC_SWAP_OPERANDS },
16276 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16278 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16279 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16280 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16281 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16283 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16284 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16285 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16286 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16288 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16289 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16290 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16291 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16292 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16295 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16296 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16297 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16298 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16299 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16300 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16301 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16302 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16304 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16305 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16306 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16307 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16308 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16309 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16310 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16311 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16313 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16314 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16315 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16317 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16318 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16319 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16320 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16322 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16323 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16325 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16326 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16327 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16328 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16329 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16330 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16332 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16333 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16334 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16335 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16337 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16338 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16339 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16340 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16341 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16342 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16345 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16346 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16347 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16349 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16350 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16351 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16353 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16354 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16355 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16356 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16357 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16358 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16360 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16361 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16362 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16363 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16364 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16365 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16367 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16368 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16369 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16370 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16372 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16373 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16376 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16378 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16379 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16380 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16381 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16385 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16386 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16387 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16388 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16389 BUILTIN_DESC_SWAP_OPERANDS },
16390 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16391 BUILTIN_DESC_SWAP_OPERANDS },
16392 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16393 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16394 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16395 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16396 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16397 BUILTIN_DESC_SWAP_OPERANDS },
16398 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16399 BUILTIN_DESC_SWAP_OPERANDS },
16400 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16401 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16402 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16403 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16404 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16405 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16406 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16407 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16408 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16410 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16411 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16412 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16413 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16415 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16416 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16417 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16418 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16420 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16421 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16422 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16425 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16426 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16427 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16428 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16429 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16430 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16431 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16432 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16434 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16435 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16436 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16437 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16438 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16439 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16440 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16441 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16443 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16444 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16446 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16448 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16449 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16451 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16452 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16454 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16455 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16456 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16457 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16458 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16459 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16461 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16462 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16463 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16464 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16466 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16467 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16468 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16469 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16470 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16471 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16472 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16473 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16475 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16476 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16477 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16479 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16480 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16482 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16483 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16485 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16486 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16487 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16489 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16490 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16491 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16493 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16494 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16496 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16498 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16499 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16500 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16501 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16504 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16505 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16506 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16507 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16508 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16509 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16512 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16513 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16514 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16515 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16516 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16517 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16518 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16519 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16520 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16521 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16522 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16523 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16524 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16525 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16526 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16527 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16528 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16529 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16530 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16531 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16532 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16533 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16534 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16535 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16538 static const struct builtin_description bdesc_1arg[] =
16540 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16541 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16543 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16544 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16545 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16547 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16548 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16549 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16550 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16551 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16552 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16554 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16555 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16557 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16559 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16560 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16562 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16563 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16564 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16565 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16566 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16568 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16570 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16571 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16572 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16573 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16575 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16577 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16580 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16581 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16584 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16585 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16586 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16587 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16588 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16589 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16592 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16593 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16596 ix86_init_mmx_sse_builtins (void)
16598 const struct builtin_description * d;
16601 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16602 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16603 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16604 tree V2DI_type_node
16605 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16606 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16607 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16608 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16609 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16610 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16611 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16613 tree pchar_type_node = build_pointer_type (char_type_node);
16614 tree pcchar_type_node = build_pointer_type (
16615 build_type_variant (char_type_node, 1, 0));
16616 tree pfloat_type_node = build_pointer_type (float_type_node);
16617 tree pcfloat_type_node = build_pointer_type (
16618 build_type_variant (float_type_node, 1, 0));
16619 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16620 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16621 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16624 tree int_ftype_v4sf_v4sf
16625 = build_function_type_list (integer_type_node,
16626 V4SF_type_node, V4SF_type_node, NULL_TREE);
16627 tree v4si_ftype_v4sf_v4sf
16628 = build_function_type_list (V4SI_type_node,
16629 V4SF_type_node, V4SF_type_node, NULL_TREE);
16630 /* MMX/SSE/integer conversions. */
16631 tree int_ftype_v4sf
16632 = build_function_type_list (integer_type_node,
16633 V4SF_type_node, NULL_TREE);
16634 tree int64_ftype_v4sf
16635 = build_function_type_list (long_long_integer_type_node,
16636 V4SF_type_node, NULL_TREE);
16637 tree int_ftype_v8qi
16638 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16639 tree v4sf_ftype_v4sf_int
16640 = build_function_type_list (V4SF_type_node,
16641 V4SF_type_node, integer_type_node, NULL_TREE);
16642 tree v4sf_ftype_v4sf_int64
16643 = build_function_type_list (V4SF_type_node,
16644 V4SF_type_node, long_long_integer_type_node,
16646 tree v4sf_ftype_v4sf_v2si
16647 = build_function_type_list (V4SF_type_node,
16648 V4SF_type_node, V2SI_type_node, NULL_TREE);
16650 /* Miscellaneous. */
16651 tree v8qi_ftype_v4hi_v4hi
16652 = build_function_type_list (V8QI_type_node,
16653 V4HI_type_node, V4HI_type_node, NULL_TREE);
16654 tree v4hi_ftype_v2si_v2si
16655 = build_function_type_list (V4HI_type_node,
16656 V2SI_type_node, V2SI_type_node, NULL_TREE);
16657 tree v4sf_ftype_v4sf_v4sf_int
16658 = build_function_type_list (V4SF_type_node,
16659 V4SF_type_node, V4SF_type_node,
16660 integer_type_node, NULL_TREE);
16661 tree v2si_ftype_v4hi_v4hi
16662 = build_function_type_list (V2SI_type_node,
16663 V4HI_type_node, V4HI_type_node, NULL_TREE);
16664 tree v4hi_ftype_v4hi_int
16665 = build_function_type_list (V4HI_type_node,
16666 V4HI_type_node, integer_type_node, NULL_TREE);
16667 tree v4hi_ftype_v4hi_di
16668 = build_function_type_list (V4HI_type_node,
16669 V4HI_type_node, long_long_unsigned_type_node,
16671 tree v2si_ftype_v2si_di
16672 = build_function_type_list (V2SI_type_node,
16673 V2SI_type_node, long_long_unsigned_type_node,
16675 tree void_ftype_void
16676 = build_function_type (void_type_node, void_list_node);
16677 tree void_ftype_unsigned
16678 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16679 tree void_ftype_unsigned_unsigned
16680 = build_function_type_list (void_type_node, unsigned_type_node,
16681 unsigned_type_node, NULL_TREE);
16682 tree void_ftype_pcvoid_unsigned_unsigned
16683 = build_function_type_list (void_type_node, const_ptr_type_node,
16684 unsigned_type_node, unsigned_type_node,
16686 tree unsigned_ftype_void
16687 = build_function_type (unsigned_type_node, void_list_node);
16688 tree v2si_ftype_v4sf
16689 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16690 /* Loads/stores. */
16691 tree void_ftype_v8qi_v8qi_pchar
16692 = build_function_type_list (void_type_node,
16693 V8QI_type_node, V8QI_type_node,
16694 pchar_type_node, NULL_TREE);
16695 tree v4sf_ftype_pcfloat
16696 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16697 /* @@@ the type is bogus */
16698 tree v4sf_ftype_v4sf_pv2si
16699 = build_function_type_list (V4SF_type_node,
16700 V4SF_type_node, pv2si_type_node, NULL_TREE);
16701 tree void_ftype_pv2si_v4sf
16702 = build_function_type_list (void_type_node,
16703 pv2si_type_node, V4SF_type_node, NULL_TREE);
16704 tree void_ftype_pfloat_v4sf
16705 = build_function_type_list (void_type_node,
16706 pfloat_type_node, V4SF_type_node, NULL_TREE);
16707 tree void_ftype_pdi_di
16708 = build_function_type_list (void_type_node,
16709 pdi_type_node, long_long_unsigned_type_node,
16711 tree void_ftype_pv2di_v2di
16712 = build_function_type_list (void_type_node,
16713 pv2di_type_node, V2DI_type_node, NULL_TREE);
16714 /* Normal vector unops. */
16715 tree v4sf_ftype_v4sf
16716 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16717 tree v16qi_ftype_v16qi
16718 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16719 tree v8hi_ftype_v8hi
16720 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16721 tree v4si_ftype_v4si
16722 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16723 tree v8qi_ftype_v8qi
16724 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16725 tree v4hi_ftype_v4hi
16726 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16728 /* Normal vector binops. */
16729 tree v4sf_ftype_v4sf_v4sf
16730 = build_function_type_list (V4SF_type_node,
16731 V4SF_type_node, V4SF_type_node, NULL_TREE);
16732 tree v8qi_ftype_v8qi_v8qi
16733 = build_function_type_list (V8QI_type_node,
16734 V8QI_type_node, V8QI_type_node, NULL_TREE);
16735 tree v4hi_ftype_v4hi_v4hi
16736 = build_function_type_list (V4HI_type_node,
16737 V4HI_type_node, V4HI_type_node, NULL_TREE);
16738 tree v2si_ftype_v2si_v2si
16739 = build_function_type_list (V2SI_type_node,
16740 V2SI_type_node, V2SI_type_node, NULL_TREE);
16741 tree di_ftype_di_di
16742 = build_function_type_list (long_long_unsigned_type_node,
16743 long_long_unsigned_type_node,
16744 long_long_unsigned_type_node, NULL_TREE);
16746 tree di_ftype_di_di_int
16747 = build_function_type_list (long_long_unsigned_type_node,
16748 long_long_unsigned_type_node,
16749 long_long_unsigned_type_node,
16750 integer_type_node, NULL_TREE);
16752 tree v2si_ftype_v2sf
16753 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16754 tree v2sf_ftype_v2si
16755 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16756 tree v2si_ftype_v2si
16757 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16758 tree v2sf_ftype_v2sf
16759 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16760 tree v2sf_ftype_v2sf_v2sf
16761 = build_function_type_list (V2SF_type_node,
16762 V2SF_type_node, V2SF_type_node, NULL_TREE);
16763 tree v2si_ftype_v2sf_v2sf
16764 = build_function_type_list (V2SI_type_node,
16765 V2SF_type_node, V2SF_type_node, NULL_TREE);
16766 tree pint_type_node = build_pointer_type (integer_type_node);
16767 tree pdouble_type_node = build_pointer_type (double_type_node);
16768 tree pcdouble_type_node = build_pointer_type (
16769 build_type_variant (double_type_node, 1, 0));
16770 tree int_ftype_v2df_v2df
16771 = build_function_type_list (integer_type_node,
16772 V2DF_type_node, V2DF_type_node, NULL_TREE);
16774 tree void_ftype_pcvoid
16775 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16776 tree v4sf_ftype_v4si
16777 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16778 tree v4si_ftype_v4sf
16779 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16780 tree v2df_ftype_v4si
16781 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16782 tree v4si_ftype_v2df
16783 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16784 tree v2si_ftype_v2df
16785 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16786 tree v4sf_ftype_v2df
16787 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16788 tree v2df_ftype_v2si
16789 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16790 tree v2df_ftype_v4sf
16791 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16792 tree int_ftype_v2df
16793 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16794 tree int64_ftype_v2df
16795 = build_function_type_list (long_long_integer_type_node,
16796 V2DF_type_node, NULL_TREE);
16797 tree v2df_ftype_v2df_int
16798 = build_function_type_list (V2DF_type_node,
16799 V2DF_type_node, integer_type_node, NULL_TREE);
16800 tree v2df_ftype_v2df_int64
16801 = build_function_type_list (V2DF_type_node,
16802 V2DF_type_node, long_long_integer_type_node,
16804 tree v4sf_ftype_v4sf_v2df
16805 = build_function_type_list (V4SF_type_node,
16806 V4SF_type_node, V2DF_type_node, NULL_TREE);
16807 tree v2df_ftype_v2df_v4sf
16808 = build_function_type_list (V2DF_type_node,
16809 V2DF_type_node, V4SF_type_node, NULL_TREE);
16810 tree v2df_ftype_v2df_v2df_int
16811 = build_function_type_list (V2DF_type_node,
16812 V2DF_type_node, V2DF_type_node,
16815 tree v2df_ftype_v2df_pcdouble
16816 = build_function_type_list (V2DF_type_node,
16817 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16818 tree void_ftype_pdouble_v2df
16819 = build_function_type_list (void_type_node,
16820 pdouble_type_node, V2DF_type_node, NULL_TREE);
16821 tree void_ftype_pint_int
16822 = build_function_type_list (void_type_node,
16823 pint_type_node, integer_type_node, NULL_TREE);
16824 tree void_ftype_v16qi_v16qi_pchar
16825 = build_function_type_list (void_type_node,
16826 V16QI_type_node, V16QI_type_node,
16827 pchar_type_node, NULL_TREE);
16828 tree v2df_ftype_pcdouble
16829 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16830 tree v2df_ftype_v2df_v2df
16831 = build_function_type_list (V2DF_type_node,
16832 V2DF_type_node, V2DF_type_node, NULL_TREE);
16833 tree v16qi_ftype_v16qi_v16qi
16834 = build_function_type_list (V16QI_type_node,
16835 V16QI_type_node, V16QI_type_node, NULL_TREE);
16836 tree v8hi_ftype_v8hi_v8hi
16837 = build_function_type_list (V8HI_type_node,
16838 V8HI_type_node, V8HI_type_node, NULL_TREE);
16839 tree v4si_ftype_v4si_v4si
16840 = build_function_type_list (V4SI_type_node,
16841 V4SI_type_node, V4SI_type_node, NULL_TREE);
16842 tree v2di_ftype_v2di_v2di
16843 = build_function_type_list (V2DI_type_node,
16844 V2DI_type_node, V2DI_type_node, NULL_TREE);
16845 tree v2di_ftype_v2df_v2df
16846 = build_function_type_list (V2DI_type_node,
16847 V2DF_type_node, V2DF_type_node, NULL_TREE);
16848 tree v2df_ftype_v2df
16849 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16850 tree v2di_ftype_v2di_int
16851 = build_function_type_list (V2DI_type_node,
16852 V2DI_type_node, integer_type_node, NULL_TREE);
16853 tree v2di_ftype_v2di_v2di_int
16854 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16855 V2DI_type_node, integer_type_node, NULL_TREE);
16856 tree v4si_ftype_v4si_int
16857 = build_function_type_list (V4SI_type_node,
16858 V4SI_type_node, integer_type_node, NULL_TREE);
16859 tree v8hi_ftype_v8hi_int
16860 = build_function_type_list (V8HI_type_node,
16861 V8HI_type_node, integer_type_node, NULL_TREE);
16862 tree v4si_ftype_v8hi_v8hi
16863 = build_function_type_list (V4SI_type_node,
16864 V8HI_type_node, V8HI_type_node, NULL_TREE);
16865 tree di_ftype_v8qi_v8qi
16866 = build_function_type_list (long_long_unsigned_type_node,
16867 V8QI_type_node, V8QI_type_node, NULL_TREE);
16868 tree di_ftype_v2si_v2si
16869 = build_function_type_list (long_long_unsigned_type_node,
16870 V2SI_type_node, V2SI_type_node, NULL_TREE);
16871 tree v2di_ftype_v16qi_v16qi
16872 = build_function_type_list (V2DI_type_node,
16873 V16QI_type_node, V16QI_type_node, NULL_TREE);
16874 tree v2di_ftype_v4si_v4si
16875 = build_function_type_list (V2DI_type_node,
16876 V4SI_type_node, V4SI_type_node, NULL_TREE);
16877 tree int_ftype_v16qi
16878 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16879 tree v16qi_ftype_pcchar
16880 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16881 tree void_ftype_pchar_v16qi
16882 = build_function_type_list (void_type_node,
16883 pchar_type_node, V16QI_type_node, NULL_TREE);
16885 tree v2di_ftype_v2di_unsigned_unsigned
16886 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16887 unsigned_type_node, unsigned_type_node,
16889 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16890 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16891 unsigned_type_node, unsigned_type_node,
16893 tree v2di_ftype_v2di_v16qi
16894 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16898 tree float128_type;
16901 /* The __float80 type. */
16902 if (TYPE_MODE (long_double_type_node) == XFmode)
16903 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16907 /* The __float80 type. */
16908 float80_type = make_node (REAL_TYPE);
16909 TYPE_PRECISION (float80_type) = 80;
16910 layout_type (float80_type);
16911 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16916 float128_type = make_node (REAL_TYPE);
16917 TYPE_PRECISION (float128_type) = 128;
16918 layout_type (float128_type);
16919 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16922 /* Add all builtins that are more or less simple operations on two
16924 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16926 /* Use one of the operands; the target can have a different mode for
16927 mask-generating compares. */
16928 enum machine_mode mode;
16933 mode = insn_data[d->icode].operand[1].mode;
16938 type = v16qi_ftype_v16qi_v16qi;
16941 type = v8hi_ftype_v8hi_v8hi;
16944 type = v4si_ftype_v4si_v4si;
16947 type = v2di_ftype_v2di_v2di;
16950 type = v2df_ftype_v2df_v2df;
16953 type = v4sf_ftype_v4sf_v4sf;
16956 type = v8qi_ftype_v8qi_v8qi;
16959 type = v4hi_ftype_v4hi_v4hi;
16962 type = v2si_ftype_v2si_v2si;
16965 type = di_ftype_di_di;
16969 gcc_unreachable ();
16972 /* Override for comparisons. */
16973 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16974 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16975 type = v4si_ftype_v4sf_v4sf;
16977 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16978 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16979 type = v2di_ftype_v2df_v2df;
16981 def_builtin (d->mask, d->name, type, d->code);
16984 /* Add all builtins that are more or less simple operations on 1 operand. */
16985 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16987 enum machine_mode mode;
16992 mode = insn_data[d->icode].operand[1].mode;
16997 type = v16qi_ftype_v16qi;
17000 type = v8hi_ftype_v8hi;
17003 type = v4si_ftype_v4si;
17006 type = v2df_ftype_v2df;
17009 type = v4sf_ftype_v4sf;
17012 type = v8qi_ftype_v8qi;
17015 type = v4hi_ftype_v4hi;
17018 type = v2si_ftype_v2si;
17025 def_builtin (d->mask, d->name, type, d->code);
17028 /* Add the remaining MMX insns with somewhat more complicated types. */
17029 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17030 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17031 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17032 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17034 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17035 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17036 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17038 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17039 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17041 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17042 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17044 /* comi/ucomi insns. */
17045 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17046 if (d->mask == MASK_SSE2)
17047 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17049 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17051 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17052 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17053 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17055 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17056 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17057 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17058 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17059 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17060 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17061 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17062 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17063 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17064 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17065 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17067 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17069 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17070 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17072 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17073 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17074 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17075 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17077 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17078 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17079 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17080 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17082 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17084 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17086 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17087 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17088 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17089 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17090 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17091 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17093 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17095 /* Original 3DNow! */
17096 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17097 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17098 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17099 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17100 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17101 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17102 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17103 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17104 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17105 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17106 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17107 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17108 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17109 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17110 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17111 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17112 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17113 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17114 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17115 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17117 /* 3DNow! extension as used in the Athlon CPU. */
17118 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17119 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17120 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17121 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17122 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17123 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17126 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17128 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17129 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17131 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17132 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17134 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17135 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17136 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17137 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17138 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17140 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17141 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17142 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17143 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17145 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17146 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17148 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17150 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17151 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17153 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17154 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17155 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17156 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17157 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17159 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17161 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17162 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17163 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17164 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17166 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17167 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17168 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17170 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17171 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17172 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17173 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17175 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17176 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17177 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17179 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17180 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17182 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17183 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17185 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17186 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17187 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17188 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17189 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17190 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17191 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17193 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17194 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17195 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17196 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17197 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17198 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17199 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17201 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17202 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17203 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17204 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17206 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17208 /* Prescott New Instructions. */
17209 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17210 void_ftype_pcvoid_unsigned_unsigned,
17211 IX86_BUILTIN_MONITOR);
17212 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17213 void_ftype_unsigned_unsigned,
17214 IX86_BUILTIN_MWAIT);
17215 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17216 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17219 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17220 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17221 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17222 IX86_BUILTIN_PALIGNR);
17224 /* AMDFAM10 SSE4A New built-ins */
17225 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17226 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17227 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17228 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17229 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17230 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17231 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17232 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17233 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17234 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17235 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17236 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17238 /* Access to the vec_init patterns. */
17239 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17240 integer_type_node, NULL_TREE);
17241 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17242 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17244 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17245 short_integer_type_node,
17246 short_integer_type_node,
17247 short_integer_type_node, NULL_TREE);
17248 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17249 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17251 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17252 char_type_node, char_type_node,
17253 char_type_node, char_type_node,
17254 char_type_node, char_type_node,
17255 char_type_node, NULL_TREE);
17256 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17257 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17259 /* Access to the vec_extract patterns. */
17260 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17261 integer_type_node, NULL_TREE);
17262 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17263 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17265 ftype = build_function_type_list (long_long_integer_type_node,
17266 V2DI_type_node, integer_type_node,
17268 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17269 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17271 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17272 integer_type_node, NULL_TREE);
17273 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17274 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17276 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17277 integer_type_node, NULL_TREE);
17278 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17279 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17281 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17282 integer_type_node, NULL_TREE);
17283 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17284 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17286 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17287 integer_type_node, NULL_TREE);
17288 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17289 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17291 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17292 integer_type_node, NULL_TREE);
17293 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17294 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17296 /* Access to the vec_set patterns. */
17297 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17299 integer_type_node, NULL_TREE);
17300 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17301 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17303 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17305 integer_type_node, NULL_TREE);
17306 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17307 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17311 ix86_init_builtins (void)
17314 ix86_init_mmx_sse_builtins ();
17317 /* Errors in the source file can cause expand_expr to return const0_rtx
17318 where we expect a vector. To avoid crashing, use one of the vector
17319 clear instructions. */
17321 safe_vector_operand (rtx x, enum machine_mode mode)
17323 if (x == const0_rtx)
17324 x = CONST0_RTX (mode);
17328 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17331 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17334 tree arg0 = CALL_EXPR_ARG (exp, 0);
17335 tree arg1 = CALL_EXPR_ARG (exp, 1);
17336 rtx op0 = expand_normal (arg0);
17337 rtx op1 = expand_normal (arg1);
17338 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17339 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17340 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17342 if (VECTOR_MODE_P (mode0))
17343 op0 = safe_vector_operand (op0, mode0);
17344 if (VECTOR_MODE_P (mode1))
17345 op1 = safe_vector_operand (op1, mode1);
17347 if (optimize || !target
17348 || GET_MODE (target) != tmode
17349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17350 target = gen_reg_rtx (tmode);
17352 if (GET_MODE (op1) == SImode && mode1 == TImode)
17354 rtx x = gen_reg_rtx (V4SImode);
17355 emit_insn (gen_sse2_loadd (x, op1));
17356 op1 = gen_lowpart (TImode, x);
17359 /* The insn must want input operands in the same modes as the
17361 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17362 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17364 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17365 op0 = copy_to_mode_reg (mode0, op0);
17366 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17367 op1 = copy_to_mode_reg (mode1, op1);
17369 /* ??? Using ix86_fixup_binary_operands is problematic when
17370 we've got mismatched modes. Fake it. */
17376 if (tmode == mode0 && tmode == mode1)
17378 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17382 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17384 op0 = force_reg (mode0, op0);
17385 op1 = force_reg (mode1, op1);
17386 target = gen_reg_rtx (tmode);
17389 pat = GEN_FCN (icode) (target, op0, op1);
17396 /* Subroutine of ix86_expand_builtin to take care of stores. */
17399 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17402 tree arg0 = CALL_EXPR_ARG (exp, 0);
17403 tree arg1 = CALL_EXPR_ARG (exp, 1);
17404 rtx op0 = expand_normal (arg0);
17405 rtx op1 = expand_normal (arg1);
17406 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17407 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17409 if (VECTOR_MODE_P (mode1))
17410 op1 = safe_vector_operand (op1, mode1);
17412 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17413 op1 = copy_to_mode_reg (mode1, op1);
17415 pat = GEN_FCN (icode) (op0, op1);
17421 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17424 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17425 rtx target, int do_load)
17428 tree arg0 = CALL_EXPR_ARG (exp, 0);
17429 rtx op0 = expand_normal (arg0);
17430 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17431 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17433 if (optimize || !target
17434 || GET_MODE (target) != tmode
17435 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17436 target = gen_reg_rtx (tmode);
17438 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17441 if (VECTOR_MODE_P (mode0))
17442 op0 = safe_vector_operand (op0, mode0);
17444 if ((optimize && !register_operand (op0, mode0))
17445 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17446 op0 = copy_to_mode_reg (mode0, op0);
17449 pat = GEN_FCN (icode) (target, op0);
17456 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17457 sqrtss, rsqrtss, rcpss. */
17460 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17463 tree arg0 = CALL_EXPR_ARG (exp, 0);
17464 rtx op1, op0 = expand_normal (arg0);
17465 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17466 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17468 if (optimize || !target
17469 || GET_MODE (target) != tmode
17470 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17471 target = gen_reg_rtx (tmode);
17473 if (VECTOR_MODE_P (mode0))
17474 op0 = safe_vector_operand (op0, mode0);
17476 if ((optimize && !register_operand (op0, mode0))
17477 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17478 op0 = copy_to_mode_reg (mode0, op0);
17481 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17482 op1 = copy_to_mode_reg (mode0, op1);
17484 pat = GEN_FCN (icode) (target, op0, op1);
17491 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17494 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17498 tree arg0 = CALL_EXPR_ARG (exp, 0);
17499 tree arg1 = CALL_EXPR_ARG (exp, 1);
17500 rtx op0 = expand_normal (arg0);
17501 rtx op1 = expand_normal (arg1);
17503 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17504 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17505 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17506 enum rtx_code comparison = d->comparison;
17508 if (VECTOR_MODE_P (mode0))
17509 op0 = safe_vector_operand (op0, mode0);
17510 if (VECTOR_MODE_P (mode1))
17511 op1 = safe_vector_operand (op1, mode1);
17513 /* Swap operands if we have a comparison that isn't available in
17515 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17517 rtx tmp = gen_reg_rtx (mode1);
17518 emit_move_insn (tmp, op1);
17523 if (optimize || !target
17524 || GET_MODE (target) != tmode
17525 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17526 target = gen_reg_rtx (tmode);
17528 if ((optimize && !register_operand (op0, mode0))
17529 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17530 op0 = copy_to_mode_reg (mode0, op0);
17531 if ((optimize && !register_operand (op1, mode1))
17532 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17533 op1 = copy_to_mode_reg (mode1, op1);
17535 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17536 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17543 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17546 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17550 tree arg0 = CALL_EXPR_ARG (exp, 0);
17551 tree arg1 = CALL_EXPR_ARG (exp, 1);
17552 rtx op0 = expand_normal (arg0);
17553 rtx op1 = expand_normal (arg1);
17555 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17556 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17557 enum rtx_code comparison = d->comparison;
17559 if (VECTOR_MODE_P (mode0))
17560 op0 = safe_vector_operand (op0, mode0);
17561 if (VECTOR_MODE_P (mode1))
17562 op1 = safe_vector_operand (op1, mode1);
17564 /* Swap operands if we have a comparison that isn't available in
17566 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17573 target = gen_reg_rtx (SImode);
17574 emit_move_insn (target, const0_rtx);
17575 target = gen_rtx_SUBREG (QImode, target, 0);
17577 if ((optimize && !register_operand (op0, mode0))
17578 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17579 op0 = copy_to_mode_reg (mode0, op0);
17580 if ((optimize && !register_operand (op1, mode1))
17581 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17582 op1 = copy_to_mode_reg (mode1, op1);
17584 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17585 pat = GEN_FCN (d->icode) (op0, op1);
17589 emit_insn (gen_rtx_SET (VOIDmode,
17590 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17591 gen_rtx_fmt_ee (comparison, QImode,
17595 return SUBREG_REG (target);
17598 /* Return the integer constant in ARG. Constrain it to be in the range
17599 of the subparts of VEC_TYPE; issue an error if not. */
17602 get_element_number (tree vec_type, tree arg)
17604 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17606 if (!host_integerp (arg, 1)
17607 || (elt = tree_low_cst (arg, 1), elt > max))
17609 error ("selector must be an integer constant in the range 0..%wi", max);
17616 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17617 ix86_expand_vector_init. We DO have language-level syntax for this, in
17618 the form of (type){ init-list }. Except that since we can't place emms
17619 instructions from inside the compiler, we can't allow the use of MMX
17620 registers unless the user explicitly asks for it. So we do *not* define
17621 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17622 we have builtins invoked by mmintrin.h that gives us license to emit
17623 these sorts of instructions. */
17626 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17628 enum machine_mode tmode = TYPE_MODE (type);
17629 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17630 int i, n_elt = GET_MODE_NUNITS (tmode);
17631 rtvec v = rtvec_alloc (n_elt);
17633 gcc_assert (VECTOR_MODE_P (tmode));
17634 gcc_assert (call_expr_nargs (exp) == n_elt);
17636 for (i = 0; i < n_elt; ++i)
17638 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17639 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17642 if (!target || !register_operand (target, tmode))
17643 target = gen_reg_rtx (tmode);
17645 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17649 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17650 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17651 had a language-level syntax for referencing vector elements. */
17654 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17656 enum machine_mode tmode, mode0;
17661 arg0 = CALL_EXPR_ARG (exp, 0);
17662 arg1 = CALL_EXPR_ARG (exp, 1);
17664 op0 = expand_normal (arg0);
17665 elt = get_element_number (TREE_TYPE (arg0), arg1);
17667 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17668 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17669 gcc_assert (VECTOR_MODE_P (mode0));
17671 op0 = force_reg (mode0, op0);
17673 if (optimize || !target || !register_operand (target, tmode))
17674 target = gen_reg_rtx (tmode);
17676 ix86_expand_vector_extract (true, target, op0, elt);
17681 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17682 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17683 a language-level syntax for referencing vector elements. */
17686 ix86_expand_vec_set_builtin (tree exp)
17688 enum machine_mode tmode, mode1;
17689 tree arg0, arg1, arg2;
17693 arg0 = CALL_EXPR_ARG (exp, 0);
17694 arg1 = CALL_EXPR_ARG (exp, 1);
17695 arg2 = CALL_EXPR_ARG (exp, 2);
17697 tmode = TYPE_MODE (TREE_TYPE (arg0));
17698 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17699 gcc_assert (VECTOR_MODE_P (tmode));
17701 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17702 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17703 elt = get_element_number (TREE_TYPE (arg0), arg2);
17705 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17706 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17708 op0 = force_reg (tmode, op0);
17709 op1 = force_reg (mode1, op1);
17711 ix86_expand_vector_set (true, op0, op1, elt);
17716 /* Expand an expression EXP that calls a built-in function,
17717 with result going to TARGET if that's convenient
17718 (and in mode MODE if that's convenient).
17719 SUBTARGET may be used as the target for computing one of EXP's operands.
17720 IGNORE is nonzero if the value is to be ignored. */
17723 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17724 enum machine_mode mode ATTRIBUTE_UNUSED,
17725 int ignore ATTRIBUTE_UNUSED)
17727 const struct builtin_description *d;
17729 enum insn_code icode;
17730 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17731 tree arg0, arg1, arg2, arg3;
17732 rtx op0, op1, op2, op3, pat;
17733 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17734 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17738 case IX86_BUILTIN_EMMS:
17739 emit_insn (gen_mmx_emms ());
17742 case IX86_BUILTIN_SFENCE:
17743 emit_insn (gen_sse_sfence ());
17746 case IX86_BUILTIN_MASKMOVQ:
17747 case IX86_BUILTIN_MASKMOVDQU:
17748 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17749 ? CODE_FOR_mmx_maskmovq
17750 : CODE_FOR_sse2_maskmovdqu);
17751 /* Note the arg order is different from the operand order. */
17752 arg1 = CALL_EXPR_ARG (exp, 0);
17753 arg2 = CALL_EXPR_ARG (exp, 1);
17754 arg0 = CALL_EXPR_ARG (exp, 2);
17755 op0 = expand_normal (arg0);
17756 op1 = expand_normal (arg1);
17757 op2 = expand_normal (arg2);
17758 mode0 = insn_data[icode].operand[0].mode;
17759 mode1 = insn_data[icode].operand[1].mode;
17760 mode2 = insn_data[icode].operand[2].mode;
17762 op0 = force_reg (Pmode, op0);
17763 op0 = gen_rtx_MEM (mode1, op0);
17765 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17766 op0 = copy_to_mode_reg (mode0, op0);
17767 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17768 op1 = copy_to_mode_reg (mode1, op1);
17769 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17770 op2 = copy_to_mode_reg (mode2, op2);
17771 pat = GEN_FCN (icode) (op0, op1, op2);
17777 case IX86_BUILTIN_SQRTSS:
17778 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17779 case IX86_BUILTIN_RSQRTSS:
17780 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17781 case IX86_BUILTIN_RCPSS:
17782 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17784 case IX86_BUILTIN_LOADUPS:
17785 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17787 case IX86_BUILTIN_STOREUPS:
17788 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17790 case IX86_BUILTIN_LOADHPS:
17791 case IX86_BUILTIN_LOADLPS:
17792 case IX86_BUILTIN_LOADHPD:
17793 case IX86_BUILTIN_LOADLPD:
17794 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17795 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17796 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17797 : CODE_FOR_sse2_loadlpd);
17798 arg0 = CALL_EXPR_ARG (exp, 0);
17799 arg1 = CALL_EXPR_ARG (exp, 1);
17800 op0 = expand_normal (arg0);
17801 op1 = expand_normal (arg1);
17802 tmode = insn_data[icode].operand[0].mode;
17803 mode0 = insn_data[icode].operand[1].mode;
17804 mode1 = insn_data[icode].operand[2].mode;
17806 op0 = force_reg (mode0, op0);
17807 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17808 if (optimize || target == 0
17809 || GET_MODE (target) != tmode
17810 || !register_operand (target, tmode))
17811 target = gen_reg_rtx (tmode);
17812 pat = GEN_FCN (icode) (target, op0, op1);
17818 case IX86_BUILTIN_STOREHPS:
17819 case IX86_BUILTIN_STORELPS:
17820 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17821 : CODE_FOR_sse_storelps);
17822 arg0 = CALL_EXPR_ARG (exp, 0);
17823 arg1 = CALL_EXPR_ARG (exp, 1);
17824 op0 = expand_normal (arg0);
17825 op1 = expand_normal (arg1);
17826 mode0 = insn_data[icode].operand[0].mode;
17827 mode1 = insn_data[icode].operand[1].mode;
17829 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17830 op1 = force_reg (mode1, op1);
17832 pat = GEN_FCN (icode) (op0, op1);
17838 case IX86_BUILTIN_MOVNTPS:
17839 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17840 case IX86_BUILTIN_MOVNTQ:
17841 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17843 case IX86_BUILTIN_LDMXCSR:
17844 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17845 target = assign_386_stack_local (SImode, SLOT_TEMP);
17846 emit_move_insn (target, op0);
17847 emit_insn (gen_sse_ldmxcsr (target));
17850 case IX86_BUILTIN_STMXCSR:
17851 target = assign_386_stack_local (SImode, SLOT_TEMP);
17852 emit_insn (gen_sse_stmxcsr (target));
17853 return copy_to_mode_reg (SImode, target);
17855 case IX86_BUILTIN_SHUFPS:
17856 case IX86_BUILTIN_SHUFPD:
17857 icode = (fcode == IX86_BUILTIN_SHUFPS
17858 ? CODE_FOR_sse_shufps
17859 : CODE_FOR_sse2_shufpd);
17860 arg0 = CALL_EXPR_ARG (exp, 0);
17861 arg1 = CALL_EXPR_ARG (exp, 1);
17862 arg2 = CALL_EXPR_ARG (exp, 2);
17863 op0 = expand_normal (arg0);
17864 op1 = expand_normal (arg1);
17865 op2 = expand_normal (arg2);
17866 tmode = insn_data[icode].operand[0].mode;
17867 mode0 = insn_data[icode].operand[1].mode;
17868 mode1 = insn_data[icode].operand[2].mode;
17869 mode2 = insn_data[icode].operand[3].mode;
17871 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17872 op0 = copy_to_mode_reg (mode0, op0);
17873 if ((optimize && !register_operand (op1, mode1))
17874 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17875 op1 = copy_to_mode_reg (mode1, op1);
17876 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17878 /* @@@ better error message */
17879 error ("mask must be an immediate");
17880 return gen_reg_rtx (tmode);
17882 if (optimize || target == 0
17883 || GET_MODE (target) != tmode
17884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17885 target = gen_reg_rtx (tmode);
17886 pat = GEN_FCN (icode) (target, op0, op1, op2);
17892 case IX86_BUILTIN_PSHUFW:
17893 case IX86_BUILTIN_PSHUFD:
17894 case IX86_BUILTIN_PSHUFHW:
17895 case IX86_BUILTIN_PSHUFLW:
17896 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17897 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17898 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17899 : CODE_FOR_mmx_pshufw);
17900 arg0 = CALL_EXPR_ARG (exp, 0);
17901 arg1 = CALL_EXPR_ARG (exp, 1);
17902 op0 = expand_normal (arg0);
17903 op1 = expand_normal (arg1);
17904 tmode = insn_data[icode].operand[0].mode;
17905 mode1 = insn_data[icode].operand[1].mode;
17906 mode2 = insn_data[icode].operand[2].mode;
17908 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17909 op0 = copy_to_mode_reg (mode1, op0);
17910 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17912 /* @@@ better error message */
17913 error ("mask must be an immediate");
17917 || GET_MODE (target) != tmode
17918 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17919 target = gen_reg_rtx (tmode);
17920 pat = GEN_FCN (icode) (target, op0, op1);
17926 case IX86_BUILTIN_PSLLWI128:
17927 icode = CODE_FOR_ashlv8hi3;
17929 case IX86_BUILTIN_PSLLDI128:
17930 icode = CODE_FOR_ashlv4si3;
17932 case IX86_BUILTIN_PSLLQI128:
17933 icode = CODE_FOR_ashlv2di3;
17935 case IX86_BUILTIN_PSRAWI128:
17936 icode = CODE_FOR_ashrv8hi3;
17938 case IX86_BUILTIN_PSRADI128:
17939 icode = CODE_FOR_ashrv4si3;
17941 case IX86_BUILTIN_PSRLWI128:
17942 icode = CODE_FOR_lshrv8hi3;
17944 case IX86_BUILTIN_PSRLDI128:
17945 icode = CODE_FOR_lshrv4si3;
17947 case IX86_BUILTIN_PSRLQI128:
17948 icode = CODE_FOR_lshrv2di3;
17951 arg0 = CALL_EXPR_ARG (exp, 0);
17952 arg1 = CALL_EXPR_ARG (exp, 1);
17953 op0 = expand_normal (arg0);
17954 op1 = expand_normal (arg1);
17956 if (!CONST_INT_P (op1))
17958 error ("shift must be an immediate");
17961 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
17962 op1 = GEN_INT (255);
17964 tmode = insn_data[icode].operand[0].mode;
17965 mode1 = insn_data[icode].operand[1].mode;
17966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17967 op0 = copy_to_reg (op0);
17969 target = gen_reg_rtx (tmode);
17970 pat = GEN_FCN (icode) (target, op0, op1);
17976 case IX86_BUILTIN_PSLLW128:
17977 icode = CODE_FOR_ashlv8hi3;
17979 case IX86_BUILTIN_PSLLD128:
17980 icode = CODE_FOR_ashlv4si3;
17982 case IX86_BUILTIN_PSLLQ128:
17983 icode = CODE_FOR_ashlv2di3;
17985 case IX86_BUILTIN_PSRAW128:
17986 icode = CODE_FOR_ashrv8hi3;
17988 case IX86_BUILTIN_PSRAD128:
17989 icode = CODE_FOR_ashrv4si3;
17991 case IX86_BUILTIN_PSRLW128:
17992 icode = CODE_FOR_lshrv8hi3;
17994 case IX86_BUILTIN_PSRLD128:
17995 icode = CODE_FOR_lshrv4si3;
17997 case IX86_BUILTIN_PSRLQ128:
17998 icode = CODE_FOR_lshrv2di3;
18001 arg0 = CALL_EXPR_ARG (exp, 0);
18002 arg1 = CALL_EXPR_ARG (exp, 1);
18003 op0 = expand_normal (arg0);
18004 op1 = expand_normal (arg1);
18006 tmode = insn_data[icode].operand[0].mode;
18007 mode1 = insn_data[icode].operand[1].mode;
18009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18010 op0 = copy_to_reg (op0);
18012 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18013 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18014 op1 = copy_to_reg (op1);
18016 target = gen_reg_rtx (tmode);
18017 pat = GEN_FCN (icode) (target, op0, op1);
18023 case IX86_BUILTIN_PSLLDQI128:
18024 case IX86_BUILTIN_PSRLDQI128:
18025 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18026 : CODE_FOR_sse2_lshrti3);
18027 arg0 = CALL_EXPR_ARG (exp, 0);
18028 arg1 = CALL_EXPR_ARG (exp, 1);
18029 op0 = expand_normal (arg0);
18030 op1 = expand_normal (arg1);
18031 tmode = insn_data[icode].operand[0].mode;
18032 mode1 = insn_data[icode].operand[1].mode;
18033 mode2 = insn_data[icode].operand[2].mode;
18035 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18037 op0 = copy_to_reg (op0);
18038 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18040 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18042 error ("shift must be an immediate");
18045 target = gen_reg_rtx (V2DImode);
18046 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18053 case IX86_BUILTIN_FEMMS:
18054 emit_insn (gen_mmx_femms ());
18057 case IX86_BUILTIN_PAVGUSB:
18058 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18060 case IX86_BUILTIN_PF2ID:
18061 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18063 case IX86_BUILTIN_PFACC:
18064 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18066 case IX86_BUILTIN_PFADD:
18067 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18069 case IX86_BUILTIN_PFCMPEQ:
18070 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18072 case IX86_BUILTIN_PFCMPGE:
18073 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18075 case IX86_BUILTIN_PFCMPGT:
18076 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18078 case IX86_BUILTIN_PFMAX:
18079 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18081 case IX86_BUILTIN_PFMIN:
18082 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18084 case IX86_BUILTIN_PFMUL:
18085 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18087 case IX86_BUILTIN_PFRCP:
18088 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18090 case IX86_BUILTIN_PFRCPIT1:
18091 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18093 case IX86_BUILTIN_PFRCPIT2:
18094 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18096 case IX86_BUILTIN_PFRSQIT1:
18097 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18099 case IX86_BUILTIN_PFRSQRT:
18100 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18102 case IX86_BUILTIN_PFSUB:
18103 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18105 case IX86_BUILTIN_PFSUBR:
18106 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18108 case IX86_BUILTIN_PI2FD:
18109 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18111 case IX86_BUILTIN_PMULHRW:
18112 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18114 case IX86_BUILTIN_PF2IW:
18115 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18117 case IX86_BUILTIN_PFNACC:
18118 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18120 case IX86_BUILTIN_PFPNACC:
18121 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18123 case IX86_BUILTIN_PI2FW:
18124 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18126 case IX86_BUILTIN_PSWAPDSI:
18127 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18129 case IX86_BUILTIN_PSWAPDSF:
18130 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18132 case IX86_BUILTIN_SQRTSD:
18133 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18134 case IX86_BUILTIN_LOADUPD:
18135 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18136 case IX86_BUILTIN_STOREUPD:
18137 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18139 case IX86_BUILTIN_MFENCE:
18140 emit_insn (gen_sse2_mfence ());
18142 case IX86_BUILTIN_LFENCE:
18143 emit_insn (gen_sse2_lfence ());
18146 case IX86_BUILTIN_CLFLUSH:
18147 arg0 = CALL_EXPR_ARG (exp, 0);
18148 op0 = expand_normal (arg0);
18149 icode = CODE_FOR_sse2_clflush;
18150 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18151 op0 = copy_to_mode_reg (Pmode, op0);
18153 emit_insn (gen_sse2_clflush (op0));
18156 case IX86_BUILTIN_MOVNTPD:
18157 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18158 case IX86_BUILTIN_MOVNTDQ:
18159 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18160 case IX86_BUILTIN_MOVNTI:
18161 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18163 case IX86_BUILTIN_LOADDQU:
18164 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18165 case IX86_BUILTIN_STOREDQU:
18166 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18168 case IX86_BUILTIN_MONITOR:
18169 arg0 = CALL_EXPR_ARG (exp, 0);
18170 arg1 = CALL_EXPR_ARG (exp, 1);
18171 arg2 = CALL_EXPR_ARG (exp, 2);
18172 op0 = expand_normal (arg0);
18173 op1 = expand_normal (arg1);
18174 op2 = expand_normal (arg2);
18176 op0 = copy_to_mode_reg (Pmode, op0);
18178 op1 = copy_to_mode_reg (SImode, op1);
18180 op2 = copy_to_mode_reg (SImode, op2);
18182 emit_insn (gen_sse3_monitor (op0, op1, op2));
18184 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18187 case IX86_BUILTIN_MWAIT:
18188 arg0 = CALL_EXPR_ARG (exp, 0);
18189 arg1 = CALL_EXPR_ARG (exp, 1);
18190 op0 = expand_normal (arg0);
18191 op1 = expand_normal (arg1);
18193 op0 = copy_to_mode_reg (SImode, op0);
18195 op1 = copy_to_mode_reg (SImode, op1);
18196 emit_insn (gen_sse3_mwait (op0, op1));
18199 case IX86_BUILTIN_LDDQU:
18200 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18203 case IX86_BUILTIN_PALIGNR:
18204 case IX86_BUILTIN_PALIGNR128:
18205 if (fcode == IX86_BUILTIN_PALIGNR)
18207 icode = CODE_FOR_ssse3_palignrdi;
18212 icode = CODE_FOR_ssse3_palignrti;
18215 arg0 = CALL_EXPR_ARG (exp, 0);
18216 arg1 = CALL_EXPR_ARG (exp, 1);
18217 arg2 = CALL_EXPR_ARG (exp, 2);
18218 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18219 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18220 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18221 tmode = insn_data[icode].operand[0].mode;
18222 mode1 = insn_data[icode].operand[1].mode;
18223 mode2 = insn_data[icode].operand[2].mode;
18224 mode3 = insn_data[icode].operand[3].mode;
18226 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18228 op0 = copy_to_reg (op0);
18229 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18231 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18233 op1 = copy_to_reg (op1);
18234 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18236 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18238 error ("shift must be an immediate");
18241 target = gen_reg_rtx (mode);
18242 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18249 case IX86_BUILTIN_MOVNTSD:
18250 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18252 case IX86_BUILTIN_MOVNTSS:
18253 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18255 case IX86_BUILTIN_INSERTQ:
18256 case IX86_BUILTIN_EXTRQ:
18257 icode = (fcode == IX86_BUILTIN_EXTRQ
18258 ? CODE_FOR_sse4a_extrq
18259 : CODE_FOR_sse4a_insertq);
18260 arg0 = CALL_EXPR_ARG (exp, 0);
18261 arg1 = CALL_EXPR_ARG (exp, 1);
18262 op0 = expand_normal (arg0);
18263 op1 = expand_normal (arg1);
18264 tmode = insn_data[icode].operand[0].mode;
18265 mode1 = insn_data[icode].operand[1].mode;
18266 mode2 = insn_data[icode].operand[2].mode;
18267 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18268 op0 = copy_to_mode_reg (mode1, op0);
18269 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18270 op1 = copy_to_mode_reg (mode2, op1);
18271 if (optimize || target == 0
18272 || GET_MODE (target) != tmode
18273 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18274 target = gen_reg_rtx (tmode);
18275 pat = GEN_FCN (icode) (target, op0, op1);
18281 case IX86_BUILTIN_EXTRQI:
18282 icode = CODE_FOR_sse4a_extrqi;
18283 arg0 = CALL_EXPR_ARG (exp, 0);
18284 arg1 = CALL_EXPR_ARG (exp, 1);
18285 arg2 = CALL_EXPR_ARG (exp, 2);
18286 op0 = expand_normal (arg0);
18287 op1 = expand_normal (arg1);
18288 op2 = expand_normal (arg2);
18289 tmode = insn_data[icode].operand[0].mode;
18290 mode1 = insn_data[icode].operand[1].mode;
18291 mode2 = insn_data[icode].operand[2].mode;
18292 mode3 = insn_data[icode].operand[3].mode;
18293 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18294 op0 = copy_to_mode_reg (mode1, op0);
18295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18297 error ("index mask must be an immediate");
18298 return gen_reg_rtx (tmode);
18300 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18302 error ("length mask must be an immediate");
18303 return gen_reg_rtx (tmode);
18305 if (optimize || target == 0
18306 || GET_MODE (target) != tmode
18307 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18308 target = gen_reg_rtx (tmode);
18309 pat = GEN_FCN (icode) (target, op0, op1, op2);
18315 case IX86_BUILTIN_INSERTQI:
18316 icode = CODE_FOR_sse4a_insertqi;
18317 arg0 = CALL_EXPR_ARG (exp, 0);
18318 arg1 = CALL_EXPR_ARG (exp, 1);
18319 arg2 = CALL_EXPR_ARG (exp, 2);
18320 arg3 = CALL_EXPR_ARG (exp, 3);
18321 op0 = expand_normal (arg0);
18322 op1 = expand_normal (arg1);
18323 op2 = expand_normal (arg2);
18324 op3 = expand_normal (arg3);
18325 tmode = insn_data[icode].operand[0].mode;
18326 mode1 = insn_data[icode].operand[1].mode;
18327 mode2 = insn_data[icode].operand[2].mode;
18328 mode3 = insn_data[icode].operand[3].mode;
18329 mode4 = insn_data[icode].operand[4].mode;
18331 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18332 op0 = copy_to_mode_reg (mode1, op0);
18334 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18335 op1 = copy_to_mode_reg (mode2, op1);
18337 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18339 error ("index mask must be an immediate");
18340 return gen_reg_rtx (tmode);
18342 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18344 error ("length mask must be an immediate");
18345 return gen_reg_rtx (tmode);
18347 if (optimize || target == 0
18348 || GET_MODE (target) != tmode
18349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18350 target = gen_reg_rtx (tmode);
18351 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18357 case IX86_BUILTIN_VEC_INIT_V2SI:
18358 case IX86_BUILTIN_VEC_INIT_V4HI:
18359 case IX86_BUILTIN_VEC_INIT_V8QI:
18360 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18362 case IX86_BUILTIN_VEC_EXT_V2DF:
18363 case IX86_BUILTIN_VEC_EXT_V2DI:
18364 case IX86_BUILTIN_VEC_EXT_V4SF:
18365 case IX86_BUILTIN_VEC_EXT_V4SI:
18366 case IX86_BUILTIN_VEC_EXT_V8HI:
18367 case IX86_BUILTIN_VEC_EXT_V2SI:
18368 case IX86_BUILTIN_VEC_EXT_V4HI:
18369 return ix86_expand_vec_ext_builtin (exp, target);
18371 case IX86_BUILTIN_VEC_SET_V8HI:
18372 case IX86_BUILTIN_VEC_SET_V4HI:
18373 return ix86_expand_vec_set_builtin (exp);
18379 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18380 if (d->code == fcode)
18382 /* Compares are treated specially. */
18383 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18384 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18385 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18386 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18387 return ix86_expand_sse_compare (d, exp, target);
18389 return ix86_expand_binop_builtin (d->icode, exp, target);
18392 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18393 if (d->code == fcode)
18394 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18397 if (d->code == fcode)
18398 return ix86_expand_sse_comi (d, exp, target);
18400 gcc_unreachable ();
18403 /* Returns a function decl for a vectorized version of the builtin function
18404 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18405 if it is not available. */
18408 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18411 enum machine_mode in_mode, out_mode;
18414 if (TREE_CODE (type_out) != VECTOR_TYPE
18415 || TREE_CODE (type_in) != VECTOR_TYPE)
18418 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18419 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18420 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18421 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18425 case BUILT_IN_SQRT:
18426 if (out_mode == DFmode && out_n == 2
18427 && in_mode == DFmode && in_n == 2)
18428 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18431 case BUILT_IN_SQRTF:
18432 if (out_mode == SFmode && out_n == 4
18433 && in_mode == SFmode && in_n == 4)
18434 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18437 case BUILT_IN_LRINTF:
18438 if (out_mode == SImode && out_n == 4
18439 && in_mode == SFmode && in_n == 4)
18440 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18450 /* Returns a decl of a function that implements conversion of the
18451 input vector of type TYPE, or NULL_TREE if it is not available. */
18454 ix86_builtin_conversion (enum tree_code code, tree type)
18456 if (TREE_CODE (type) != VECTOR_TYPE)
18462 switch (TYPE_MODE (type))
18465 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18470 case FIX_TRUNC_EXPR:
18471 switch (TYPE_MODE (type))
18474 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18484 /* Store OPERAND to the memory after reload is completed. This means
18485 that we can't easily use assign_stack_local. */
18487 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18491 gcc_assert (reload_completed);
18492 if (TARGET_RED_ZONE)
18494 result = gen_rtx_MEM (mode,
18495 gen_rtx_PLUS (Pmode,
18497 GEN_INT (-RED_ZONE_SIZE)));
18498 emit_move_insn (result, operand);
18500 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18506 operand = gen_lowpart (DImode, operand);
18510 gen_rtx_SET (VOIDmode,
18511 gen_rtx_MEM (DImode,
18512 gen_rtx_PRE_DEC (DImode,
18513 stack_pointer_rtx)),
18517 gcc_unreachable ();
18519 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18528 split_di (&operand, 1, operands, operands + 1);
18530 gen_rtx_SET (VOIDmode,
18531 gen_rtx_MEM (SImode,
18532 gen_rtx_PRE_DEC (Pmode,
18533 stack_pointer_rtx)),
18536 gen_rtx_SET (VOIDmode,
18537 gen_rtx_MEM (SImode,
18538 gen_rtx_PRE_DEC (Pmode,
18539 stack_pointer_rtx)),
18544 /* Store HImodes as SImodes. */
18545 operand = gen_lowpart (SImode, operand);
18549 gen_rtx_SET (VOIDmode,
18550 gen_rtx_MEM (GET_MODE (operand),
18551 gen_rtx_PRE_DEC (SImode,
18552 stack_pointer_rtx)),
18556 gcc_unreachable ();
18558 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18563 /* Free operand from the memory. */
18565 ix86_free_from_memory (enum machine_mode mode)
18567 if (!TARGET_RED_ZONE)
18571 if (mode == DImode || TARGET_64BIT)
18575 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18576 to pop or add instruction if registers are available. */
18577 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18578 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18583 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18584 QImode must go into class Q_REGS.
18585 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18586 movdf to do mem-to-mem moves through integer regs. */
18588 ix86_preferred_reload_class (rtx x, enum reg_class class)
18590 enum machine_mode mode = GET_MODE (x);
18592 /* We're only allowed to return a subclass of CLASS. Many of the
18593 following checks fail for NO_REGS, so eliminate that early. */
18594 if (class == NO_REGS)
18597 /* All classes can load zeros. */
18598 if (x == CONST0_RTX (mode))
18601 /* Force constants into memory if we are loading a (nonzero) constant into
18602 an MMX or SSE register. This is because there are no MMX/SSE instructions
18603 to load from a constant. */
18605 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18608 /* Prefer SSE regs only, if we can use them for math. */
18609 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18610 return SSE_CLASS_P (class) ? class : NO_REGS;
18612 /* Floating-point constants need more complex checks. */
18613 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18615 /* General regs can load everything. */
18616 if (reg_class_subset_p (class, GENERAL_REGS))
18619 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18620 zero above. We only want to wind up preferring 80387 registers if
18621 we plan on doing computation with them. */
18623 && standard_80387_constant_p (x))
18625 /* Limit class to non-sse. */
18626 if (class == FLOAT_SSE_REGS)
18628 if (class == FP_TOP_SSE_REGS)
18630 if (class == FP_SECOND_SSE_REGS)
18631 return FP_SECOND_REG;
18632 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18639 /* Generally when we see PLUS here, it's the function invariant
18640 (plus soft-fp const_int). Which can only be computed into general
18642 if (GET_CODE (x) == PLUS)
18643 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18645 /* QImode constants are easy to load, but non-constant QImode data
18646 must go into Q_REGS. */
18647 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18649 if (reg_class_subset_p (class, Q_REGS))
18651 if (reg_class_subset_p (Q_REGS, class))
18659 /* Discourage putting floating-point values in SSE registers unless
18660 SSE math is being used, and likewise for the 387 registers. */
18662 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18664 enum machine_mode mode = GET_MODE (x);
18666 /* Restrict the output reload class to the register bank that we are doing
18667 math on. If we would like not to return a subset of CLASS, reject this
18668 alternative: if reload cannot do this, it will still use its choice. */
18669 mode = GET_MODE (x);
18670 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18671 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18673 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18675 if (class == FP_TOP_SSE_REGS)
18677 else if (class == FP_SECOND_SSE_REGS)
18678 return FP_SECOND_REG;
18680 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18686 /* If we are copying between general and FP registers, we need a memory
18687 location. The same is true for SSE and MMX registers.
18689 The macro can't work reliably when one of the CLASSES is class containing
18690 registers from multiple units (SSE, MMX, integer). We avoid this by never
18691 combining those units in single alternative in the machine description.
18692 Ensure that this constraint holds to avoid unexpected surprises.
18694 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18695 enforce these sanity checks. */
18698 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18699 enum machine_mode mode, int strict)
18701 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18702 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18703 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18704 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18705 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18706 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18708 gcc_assert (!strict);
18712 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18715 /* ??? This is a lie. We do have moves between mmx/general, and for
18716 mmx/sse2. But by saying we need secondary memory we discourage the
18717 register allocator from using the mmx registers unless needed. */
18718 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18721 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18723 /* SSE1 doesn't have any direct moves from other classes. */
18727 /* If the target says that inter-unit moves are more expensive
18728 than moving through memory, then don't generate them. */
18729 if (!TARGET_INTER_UNIT_MOVES)
18732 /* Between SSE and general, we have moves no larger than word size. */
18733 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18740 /* Return true if the registers in CLASS cannot represent the change from
18741 modes FROM to TO. */
18744 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18745 enum reg_class class)
18750 /* x87 registers can't do subreg at all, as all values are reformatted
18751 to extended precision. */
18752 if (MAYBE_FLOAT_CLASS_P (class))
18755 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18757 /* Vector registers do not support QI or HImode loads. If we don't
18758 disallow a change to these modes, reload will assume it's ok to
18759 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18760 the vec_dupv4hi pattern. */
18761 if (GET_MODE_SIZE (from) < 4)
18764 /* Vector registers do not support subreg with nonzero offsets, which
18765 are otherwise valid for integer registers. Since we can't see
18766 whether we have a nonzero offset from here, prohibit all
18767 nonparadoxical subregs changing size. */
18768 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18775 /* Return the cost of moving data from a register in class CLASS1 to
18776 one in class CLASS2.
18778 It is not required that the cost always equal 2 when FROM is the same as TO;
18779 on some machines it is expensive to move between registers if they are not
18780 general registers. */
18783 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18784 enum reg_class class2)
18786 /* In case we require secondary memory, compute cost of the store followed
18787 by load. In order to avoid bad register allocation choices, we need
18788 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18790 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18794 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18795 MEMORY_MOVE_COST (mode, class1, 1));
18796 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18797 MEMORY_MOVE_COST (mode, class2, 1));
18799 /* In case of copying from general_purpose_register we may emit multiple
18800 stores followed by single load causing memory size mismatch stall.
18801 Count this as arbitrarily high cost of 20. */
18802 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18805 /* In the case of FP/MMX moves, the registers actually overlap, and we
18806 have to switch modes in order to treat them differently. */
18807 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18808 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18814 /* Moves between SSE/MMX and integer unit are expensive. */
18815 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18816 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18817 return ix86_cost->mmxsse_to_integer;
18818 if (MAYBE_FLOAT_CLASS_P (class1))
18819 return ix86_cost->fp_move;
18820 if (MAYBE_SSE_CLASS_P (class1))
18821 return ix86_cost->sse_move;
18822 if (MAYBE_MMX_CLASS_P (class1))
18823 return ix86_cost->mmx_move;
18827 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18830 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18832 /* Flags and only flags can only hold CCmode values. */
18833 if (CC_REGNO_P (regno))
18834 return GET_MODE_CLASS (mode) == MODE_CC;
18835 if (GET_MODE_CLASS (mode) == MODE_CC
18836 || GET_MODE_CLASS (mode) == MODE_RANDOM
18837 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18839 if (FP_REGNO_P (regno))
18840 return VALID_FP_MODE_P (mode);
18841 if (SSE_REGNO_P (regno))
18843 /* We implement the move patterns for all vector modes into and
18844 out of SSE registers, even when no operation instructions
18846 return (VALID_SSE_REG_MODE (mode)
18847 || VALID_SSE2_REG_MODE (mode)
18848 || VALID_MMX_REG_MODE (mode)
18849 || VALID_MMX_REG_MODE_3DNOW (mode));
18851 if (MMX_REGNO_P (regno))
18853 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18854 so if the register is available at all, then we can move data of
18855 the given mode into or out of it. */
18856 return (VALID_MMX_REG_MODE (mode)
18857 || VALID_MMX_REG_MODE_3DNOW (mode));
18860 if (mode == QImode)
18862 /* Take care for QImode values - they can be in non-QI regs,
18863 but then they do cause partial register stalls. */
18864 if (regno < 4 || TARGET_64BIT)
18866 if (!TARGET_PARTIAL_REG_STALL)
18868 return reload_in_progress || reload_completed;
18870 /* We handle both integer and floats in the general purpose registers. */
18871 else if (VALID_INT_MODE_P (mode))
18873 else if (VALID_FP_MODE_P (mode))
18875 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18876 on to use that value in smaller contexts, this can easily force a
18877 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18878 supporting DImode, allow it. */
18879 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18885 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18886 tieable integer mode. */
18889 ix86_tieable_integer_mode_p (enum machine_mode mode)
18898 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18901 return TARGET_64BIT;
18908 /* Return true if MODE1 is accessible in a register that can hold MODE2
18909 without copying. That is, all register classes that can hold MODE2
18910 can also hold MODE1. */
18913 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18915 if (mode1 == mode2)
18918 if (ix86_tieable_integer_mode_p (mode1)
18919 && ix86_tieable_integer_mode_p (mode2))
18922 /* MODE2 being XFmode implies fp stack or general regs, which means we
18923 can tie any smaller floating point modes to it. Note that we do not
18924 tie this with TFmode. */
18925 if (mode2 == XFmode)
18926 return mode1 == SFmode || mode1 == DFmode;
18928 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18929 that we can tie it with SFmode. */
18930 if (mode2 == DFmode)
18931 return mode1 == SFmode;
18933 /* If MODE2 is only appropriate for an SSE register, then tie with
18934 any other mode acceptable to SSE registers. */
18935 if (GET_MODE_SIZE (mode2) == 16
18936 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18937 return (GET_MODE_SIZE (mode1) == 16
18938 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18940 /* If MODE2 is appropriate for an MMX register, then tie
18941 with any other mode acceptable to MMX registers. */
18942 if (GET_MODE_SIZE (mode2) == 8
18943 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18944 return (GET_MODE_SIZE (mode1) == 8
18945 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18950 /* Return the cost of moving data of mode M between a
18951 register and memory. A value of 2 is the default; this cost is
18952 relative to those in `REGISTER_MOVE_COST'.
18954 If moving between registers and memory is more expensive than
18955 between two registers, you should define this macro to express the
18958 Model also increased moving costs of QImode registers in non
18962 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18964 if (FLOAT_CLASS_P (class))
18981 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18983 if (SSE_CLASS_P (class))
18986 switch (GET_MODE_SIZE (mode))
19000 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19002 if (MMX_CLASS_P (class))
19005 switch (GET_MODE_SIZE (mode))
19016 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19018 switch (GET_MODE_SIZE (mode))
19022 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19023 : ix86_cost->movzbl_load);
19025 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19026 : ix86_cost->int_store[0] + 4);
19029 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19031 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19032 if (mode == TFmode)
19034 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19035 * (((int) GET_MODE_SIZE (mode)
19036 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19040 /* Compute a (partial) cost for rtx X. Return true if the complete
19041 cost has been computed, and false if subexpressions should be
19042 scanned. In either case, *TOTAL contains the cost result. */
19045 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19047 enum machine_mode mode = GET_MODE (x);
19055 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19057 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19059 else if (flag_pic && SYMBOLIC_CONST (x)
19061 || (!GET_CODE (x) != LABEL_REF
19062 && (GET_CODE (x) != SYMBOL_REF
19063 || !SYMBOL_REF_LOCAL_P (x)))))
19070 if (mode == VOIDmode)
19073 switch (standard_80387_constant_p (x))
19078 default: /* Other constants */
19083 /* Start with (MEM (SYMBOL_REF)), since that's where
19084 it'll probably end up. Add a penalty for size. */
19085 *total = (COSTS_N_INSNS (1)
19086 + (flag_pic != 0 && !TARGET_64BIT)
19087 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19093 /* The zero extensions is often completely free on x86_64, so make
19094 it as cheap as possible. */
19095 if (TARGET_64BIT && mode == DImode
19096 && GET_MODE (XEXP (x, 0)) == SImode)
19098 else if (TARGET_ZERO_EXTEND_WITH_AND)
19099 *total = ix86_cost->add;
19101 *total = ix86_cost->movzx;
19105 *total = ix86_cost->movsx;
19109 if (CONST_INT_P (XEXP (x, 1))
19110 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19112 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19115 *total = ix86_cost->add;
19118 if ((value == 2 || value == 3)
19119 && ix86_cost->lea <= ix86_cost->shift_const)
19121 *total = ix86_cost->lea;
19131 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19133 if (CONST_INT_P (XEXP (x, 1)))
19135 if (INTVAL (XEXP (x, 1)) > 32)
19136 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19138 *total = ix86_cost->shift_const * 2;
19142 if (GET_CODE (XEXP (x, 1)) == AND)
19143 *total = ix86_cost->shift_var * 2;
19145 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19150 if (CONST_INT_P (XEXP (x, 1)))
19151 *total = ix86_cost->shift_const;
19153 *total = ix86_cost->shift_var;
19158 if (FLOAT_MODE_P (mode))
19160 *total = ix86_cost->fmul;
19165 rtx op0 = XEXP (x, 0);
19166 rtx op1 = XEXP (x, 1);
19168 if (CONST_INT_P (XEXP (x, 1)))
19170 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19171 for (nbits = 0; value != 0; value &= value - 1)
19175 /* This is arbitrary. */
19178 /* Compute costs correctly for widening multiplication. */
19179 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19180 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19181 == GET_MODE_SIZE (mode))
19183 int is_mulwiden = 0;
19184 enum machine_mode inner_mode = GET_MODE (op0);
19186 if (GET_CODE (op0) == GET_CODE (op1))
19187 is_mulwiden = 1, op1 = XEXP (op1, 0);
19188 else if (CONST_INT_P (op1))
19190 if (GET_CODE (op0) == SIGN_EXTEND)
19191 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19194 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19198 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19201 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19202 + nbits * ix86_cost->mult_bit
19203 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19212 if (FLOAT_MODE_P (mode))
19213 *total = ix86_cost->fdiv;
19215 *total = ix86_cost->divide[MODE_INDEX (mode)];
19219 if (FLOAT_MODE_P (mode))
19220 *total = ix86_cost->fadd;
19221 else if (GET_MODE_CLASS (mode) == MODE_INT
19222 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19224 if (GET_CODE (XEXP (x, 0)) == PLUS
19225 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19226 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19227 && CONSTANT_P (XEXP (x, 1)))
19229 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19230 if (val == 2 || val == 4 || val == 8)
19232 *total = ix86_cost->lea;
19233 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19234 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19236 *total += rtx_cost (XEXP (x, 1), outer_code);
19240 else if (GET_CODE (XEXP (x, 0)) == MULT
19241 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19243 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19244 if (val == 2 || val == 4 || val == 8)
19246 *total = ix86_cost->lea;
19247 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19248 *total += rtx_cost (XEXP (x, 1), outer_code);
19252 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19254 *total = ix86_cost->lea;
19255 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19256 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19257 *total += rtx_cost (XEXP (x, 1), outer_code);
19264 if (FLOAT_MODE_P (mode))
19266 *total = ix86_cost->fadd;
19274 if (!TARGET_64BIT && mode == DImode)
19276 *total = (ix86_cost->add * 2
19277 + (rtx_cost (XEXP (x, 0), outer_code)
19278 << (GET_MODE (XEXP (x, 0)) != DImode))
19279 + (rtx_cost (XEXP (x, 1), outer_code)
19280 << (GET_MODE (XEXP (x, 1)) != DImode)));
19286 if (FLOAT_MODE_P (mode))
19288 *total = ix86_cost->fchs;
19294 if (!TARGET_64BIT && mode == DImode)
19295 *total = ix86_cost->add * 2;
19297 *total = ix86_cost->add;
19301 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19302 && XEXP (XEXP (x, 0), 1) == const1_rtx
19303 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19304 && XEXP (x, 1) == const0_rtx)
19306 /* This kind of construct is implemented using test[bwl].
19307 Treat it as if we had an AND. */
19308 *total = (ix86_cost->add
19309 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19310 + rtx_cost (const1_rtx, outer_code));
19316 if (!TARGET_SSE_MATH
19318 || (mode == DFmode && !TARGET_SSE2))
19323 if (FLOAT_MODE_P (mode))
19324 *total = ix86_cost->fabs;
19328 if (FLOAT_MODE_P (mode))
19329 *total = ix86_cost->fsqrt;
19333 if (XINT (x, 1) == UNSPEC_TP)
19344 static int current_machopic_label_num;
19346 /* Given a symbol name and its associated stub, write out the
19347 definition of the stub. */
19350 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19352 unsigned int length;
19353 char *binder_name, *symbol_name, lazy_ptr_name[32];
19354 int label = ++current_machopic_label_num;
19356 /* For 64-bit we shouldn't get here. */
19357 gcc_assert (!TARGET_64BIT);
19359 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19360 symb = (*targetm.strip_name_encoding) (symb);
19362 length = strlen (stub);
19363 binder_name = alloca (length + 32);
19364 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19366 length = strlen (symb);
19367 symbol_name = alloca (length + 32);
19368 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19370 sprintf (lazy_ptr_name, "L%d$lz", label);
19373 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19375 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19377 fprintf (file, "%s:\n", stub);
19378 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19382 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19383 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19384 fprintf (file, "\tjmp\t*%%edx\n");
19387 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19389 fprintf (file, "%s:\n", binder_name);
19393 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19394 fprintf (file, "\tpushl\t%%eax\n");
19397 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19399 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19401 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19402 fprintf (file, "%s:\n", lazy_ptr_name);
19403 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19404 fprintf (file, "\t.long %s\n", binder_name);
19408 darwin_x86_file_end (void)
19410 darwin_file_end ();
19413 #endif /* TARGET_MACHO */
19415 /* Order the registers for register allocator. */
19418 x86_order_regs_for_local_alloc (void)
19423 /* First allocate the local general purpose registers. */
19424 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19425 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19426 reg_alloc_order [pos++] = i;
19428 /* Global general purpose registers. */
19429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19430 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19431 reg_alloc_order [pos++] = i;
19433 /* x87 registers come first in case we are doing FP math
19435 if (!TARGET_SSE_MATH)
19436 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19437 reg_alloc_order [pos++] = i;
19439 /* SSE registers. */
19440 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19441 reg_alloc_order [pos++] = i;
19442 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19443 reg_alloc_order [pos++] = i;
19445 /* x87 registers. */
19446 if (TARGET_SSE_MATH)
19447 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19448 reg_alloc_order [pos++] = i;
19450 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19451 reg_alloc_order [pos++] = i;
19453 /* Initialize the rest of array as we do not allocate some registers
19455 while (pos < FIRST_PSEUDO_REGISTER)
19456 reg_alloc_order [pos++] = 0;
19459 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19460 struct attribute_spec.handler. */
19462 ix86_handle_struct_attribute (tree *node, tree name,
19463 tree args ATTRIBUTE_UNUSED,
19464 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19467 if (DECL_P (*node))
19469 if (TREE_CODE (*node) == TYPE_DECL)
19470 type = &TREE_TYPE (*node);
19475 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19476 || TREE_CODE (*type) == UNION_TYPE)))
19478 warning (OPT_Wattributes, "%qs attribute ignored",
19479 IDENTIFIER_POINTER (name));
19480 *no_add_attrs = true;
19483 else if ((is_attribute_p ("ms_struct", name)
19484 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19485 || ((is_attribute_p ("gcc_struct", name)
19486 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19488 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19489 IDENTIFIER_POINTER (name));
19490 *no_add_attrs = true;
19497 ix86_ms_bitfield_layout_p (tree record_type)
19499 return (TARGET_MS_BITFIELD_LAYOUT &&
19500 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19501 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19504 /* Returns an expression indicating where the this parameter is
19505 located on entry to the FUNCTION. */
19508 x86_this_parameter (tree function)
19510 tree type = TREE_TYPE (function);
19514 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19515 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19518 if (ix86_function_regparm (type, function) > 0)
19522 parm = TYPE_ARG_TYPES (type);
19523 /* Figure out whether or not the function has a variable number of
19525 for (; parm; parm = TREE_CHAIN (parm))
19526 if (TREE_VALUE (parm) == void_type_node)
19528 /* If not, the this parameter is in the first argument. */
19532 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19534 return gen_rtx_REG (SImode, regno);
19538 if (aggregate_value_p (TREE_TYPE (type), type))
19539 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19541 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19544 /* Determine whether x86_output_mi_thunk can succeed. */
19547 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19548 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19549 HOST_WIDE_INT vcall_offset, tree function)
19551 /* 64-bit can handle anything. */
19555 /* For 32-bit, everything's fine if we have one free register. */
19556 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19559 /* Need a free register for vcall_offset. */
19563 /* Need a free register for GOT references. */
19564 if (flag_pic && !(*targetm.binds_local_p) (function))
19567 /* Otherwise ok. */
19571 /* Output the assembler code for a thunk function. THUNK_DECL is the
19572 declaration for the thunk function itself, FUNCTION is the decl for
19573 the target function. DELTA is an immediate constant offset to be
19574 added to THIS. If VCALL_OFFSET is nonzero, the word at
19575 *(*this + vcall_offset) should be added to THIS. */
19578 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19579 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19580 HOST_WIDE_INT vcall_offset, tree function)
19583 rtx this = x86_this_parameter (function);
19586 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19587 pull it in now and let DELTA benefit. */
19590 else if (vcall_offset)
19592 /* Put the this parameter into %eax. */
19594 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19595 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19598 this_reg = NULL_RTX;
19600 /* Adjust the this parameter by a fixed constant. */
19603 xops[0] = GEN_INT (delta);
19604 xops[1] = this_reg ? this_reg : this;
19607 if (!x86_64_general_operand (xops[0], DImode))
19609 tmp = gen_rtx_REG (DImode, R10_REG);
19611 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19615 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19618 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19621 /* Adjust the this parameter by a value stored in the vtable. */
19625 tmp = gen_rtx_REG (DImode, R10_REG);
19628 int tmp_regno = 2 /* ECX */;
19629 if (lookup_attribute ("fastcall",
19630 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19631 tmp_regno = 0 /* EAX */;
19632 tmp = gen_rtx_REG (SImode, tmp_regno);
19635 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19638 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19640 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19642 /* Adjust the this parameter. */
19643 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19644 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19646 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19647 xops[0] = GEN_INT (vcall_offset);
19649 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19650 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19652 xops[1] = this_reg;
19654 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19656 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19659 /* If necessary, drop THIS back to its stack slot. */
19660 if (this_reg && this_reg != this)
19662 xops[0] = this_reg;
19664 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19667 xops[0] = XEXP (DECL_RTL (function), 0);
19670 if (!flag_pic || (*targetm.binds_local_p) (function))
19671 output_asm_insn ("jmp\t%P0", xops);
19674 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19675 tmp = gen_rtx_CONST (Pmode, tmp);
19676 tmp = gen_rtx_MEM (QImode, tmp);
19678 output_asm_insn ("jmp\t%A0", xops);
19683 if (!flag_pic || (*targetm.binds_local_p) (function))
19684 output_asm_insn ("jmp\t%P0", xops);
19689 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19690 tmp = (gen_rtx_SYMBOL_REF
19692 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19693 tmp = gen_rtx_MEM (QImode, tmp);
19695 output_asm_insn ("jmp\t%0", xops);
19698 #endif /* TARGET_MACHO */
19700 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19701 output_set_got (tmp, NULL_RTX);
19704 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19705 output_asm_insn ("jmp\t{*}%1", xops);
19711 x86_file_start (void)
19713 default_file_start ();
19715 darwin_file_start ();
19717 if (X86_FILE_START_VERSION_DIRECTIVE)
19718 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19719 if (X86_FILE_START_FLTUSED)
19720 fputs ("\t.global\t__fltused\n", asm_out_file);
19721 if (ix86_asm_dialect == ASM_INTEL)
19722 fputs ("\t.intel_syntax\n", asm_out_file);
19726 x86_field_alignment (tree field, int computed)
19728 enum machine_mode mode;
19729 tree type = TREE_TYPE (field);
19731 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19733 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19734 ? get_inner_array_type (type) : type);
19735 if (mode == DFmode || mode == DCmode
19736 || GET_MODE_CLASS (mode) == MODE_INT
19737 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19738 return MIN (32, computed);
19742 /* Output assembler code to FILE to increment profiler label # LABELNO
19743 for profiling a function entry. */
19745 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19750 #ifndef NO_PROFILE_COUNTERS
19751 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19753 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19757 #ifndef NO_PROFILE_COUNTERS
19758 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19760 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19764 #ifndef NO_PROFILE_COUNTERS
19765 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19766 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19768 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19772 #ifndef NO_PROFILE_COUNTERS
19773 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19774 PROFILE_COUNT_REGISTER);
19776 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19780 /* We don't have exact information about the insn sizes, but we may assume
19781 quite safely that we are informed about all 1 byte insns and memory
19782 address sizes. This is enough to eliminate unnecessary padding in
19786 min_insn_size (rtx insn)
19790 if (!INSN_P (insn) || !active_insn_p (insn))
19793 /* Discard alignments we've emit and jump instructions. */
19794 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19795 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19798 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19799 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19802 /* Important case - calls are always 5 bytes.
19803 It is common to have many calls in the row. */
19805 && symbolic_reference_mentioned_p (PATTERN (insn))
19806 && !SIBLING_CALL_P (insn))
19808 if (get_attr_length (insn) <= 1)
19811 /* For normal instructions we may rely on the sizes of addresses
19812 and the presence of symbol to require 4 bytes of encoding.
19813 This is not the case for jumps where references are PC relative. */
19814 if (!JUMP_P (insn))
19816 l = get_attr_length_address (insn);
19817 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19826 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19830 ix86_avoid_jump_misspredicts (void)
19832 rtx insn, start = get_insns ();
19833 int nbytes = 0, njumps = 0;
19836 /* Look for all minimal intervals of instructions containing 4 jumps.
19837 The intervals are bounded by START and INSN. NBYTES is the total
19838 size of instructions in the interval including INSN and not including
19839 START. When the NBYTES is smaller than 16 bytes, it is possible
19840 that the end of START and INSN ends up in the same 16byte page.
19842 The smallest offset in the page INSN can start is the case where START
19843 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19844 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19846 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19849 nbytes += min_insn_size (insn);
19851 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19852 INSN_UID (insn), min_insn_size (insn));
19854 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19855 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19863 start = NEXT_INSN (start);
19864 if ((JUMP_P (start)
19865 && GET_CODE (PATTERN (start)) != ADDR_VEC
19866 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19868 njumps--, isjump = 1;
19871 nbytes -= min_insn_size (start);
19873 gcc_assert (njumps >= 0);
19875 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19876 INSN_UID (start), INSN_UID (insn), nbytes);
19878 if (njumps == 3 && isjump && nbytes < 16)
19880 int padsize = 15 - nbytes + min_insn_size (insn);
19883 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19884 INSN_UID (insn), padsize);
19885 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19890 /* AMD Athlon works faster
19891 when RET is not destination of conditional jump or directly preceded
19892 by other jump instruction. We avoid the penalty by inserting NOP just
19893 before the RET instructions in such cases. */
19895 ix86_pad_returns (void)
19900 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19902 basic_block bb = e->src;
19903 rtx ret = BB_END (bb);
19905 bool replace = false;
19907 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19908 || !maybe_hot_bb_p (bb))
19910 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19911 if (active_insn_p (prev) || LABEL_P (prev))
19913 if (prev && LABEL_P (prev))
19918 FOR_EACH_EDGE (e, ei, bb->preds)
19919 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19920 && !(e->flags & EDGE_FALLTHRU))
19925 prev = prev_active_insn (ret);
19927 && ((JUMP_P (prev) && any_condjump_p (prev))
19930 /* Empty functions get branch mispredict even when the jump destination
19931 is not visible to us. */
19932 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19937 emit_insn_before (gen_return_internal_long (), ret);
19943 /* Implement machine specific optimizations. We implement padding of returns
19944 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19948 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19949 ix86_pad_returns ();
19950 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19951 ix86_avoid_jump_misspredicts ();
19954 /* Return nonzero when QImode register that must be represented via REX prefix
19957 x86_extended_QIreg_mentioned_p (rtx insn)
19960 extract_insn_cached (insn);
19961 for (i = 0; i < recog_data.n_operands; i++)
19962 if (REG_P (recog_data.operand[i])
19963 && REGNO (recog_data.operand[i]) >= 4)
19968 /* Return nonzero when P points to register encoded via REX prefix.
19969 Called via for_each_rtx. */
19971 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19973 unsigned int regno;
19976 regno = REGNO (*p);
19977 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19980 /* Return true when INSN mentions register that must be encoded using REX
19983 x86_extended_reg_mentioned_p (rtx insn)
19985 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19988 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19989 optabs would emit if we didn't have TFmode patterns. */
19992 x86_emit_floatuns (rtx operands[2])
19994 rtx neglab, donelab, i0, i1, f0, in, out;
19995 enum machine_mode mode, inmode;
19997 inmode = GET_MODE (operands[1]);
19998 gcc_assert (inmode == SImode || inmode == DImode);
20001 in = force_reg (inmode, operands[1]);
20002 mode = GET_MODE (out);
20003 neglab = gen_label_rtx ();
20004 donelab = gen_label_rtx ();
20005 f0 = gen_reg_rtx (mode);
20007 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20009 expand_float (out, in, 0);
20011 emit_jump_insn (gen_jump (donelab));
20014 emit_label (neglab);
20016 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20018 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20020 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20022 expand_float (f0, i0, 0);
20024 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20026 emit_label (donelab);
20029 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20030 with all elements equal to VAR. Return true if successful. */
20033 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20034 rtx target, rtx val)
20036 enum machine_mode smode, wsmode, wvmode;
20051 val = force_reg (GET_MODE_INNER (mode), val);
20052 x = gen_rtx_VEC_DUPLICATE (mode, val);
20053 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20059 if (TARGET_SSE || TARGET_3DNOW_A)
20061 val = gen_lowpart (SImode, val);
20062 x = gen_rtx_TRUNCATE (HImode, val);
20063 x = gen_rtx_VEC_DUPLICATE (mode, x);
20064 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20086 /* Extend HImode to SImode using a paradoxical SUBREG. */
20087 tmp1 = gen_reg_rtx (SImode);
20088 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20089 /* Insert the SImode value as low element of V4SImode vector. */
20090 tmp2 = gen_reg_rtx (V4SImode);
20091 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20092 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20093 CONST0_RTX (V4SImode),
20095 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20096 /* Cast the V4SImode vector back to a V8HImode vector. */
20097 tmp1 = gen_reg_rtx (V8HImode);
20098 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20099 /* Duplicate the low short through the whole low SImode word. */
20100 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20101 /* Cast the V8HImode vector back to a V4SImode vector. */
20102 tmp2 = gen_reg_rtx (V4SImode);
20103 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20104 /* Replicate the low element of the V4SImode vector. */
20105 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20106 /* Cast the V2SImode back to V8HImode, and store in target. */
20107 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20118 /* Extend QImode to SImode using a paradoxical SUBREG. */
20119 tmp1 = gen_reg_rtx (SImode);
20120 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20121 /* Insert the SImode value as low element of V4SImode vector. */
20122 tmp2 = gen_reg_rtx (V4SImode);
20123 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20124 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20125 CONST0_RTX (V4SImode),
20127 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20128 /* Cast the V4SImode vector back to a V16QImode vector. */
20129 tmp1 = gen_reg_rtx (V16QImode);
20130 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20131 /* Duplicate the low byte through the whole low SImode word. */
20132 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20133 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20134 /* Cast the V16QImode vector back to a V4SImode vector. */
20135 tmp2 = gen_reg_rtx (V4SImode);
20136 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20137 /* Replicate the low element of the V4SImode vector. */
20138 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20139 /* Cast the V2SImode back to V16QImode, and store in target. */
20140 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20148 /* Replicate the value once into the next wider mode and recurse. */
20149 val = convert_modes (wsmode, smode, val, true);
20150 x = expand_simple_binop (wsmode, ASHIFT, val,
20151 GEN_INT (GET_MODE_BITSIZE (smode)),
20152 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20153 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20155 x = gen_reg_rtx (wvmode);
20156 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20157 gcc_unreachable ();
20158 emit_move_insn (target, gen_lowpart (mode, x));
20166 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20167 whose ONE_VAR element is VAR, and other elements are zero. Return true
20171 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20172 rtx target, rtx var, int one_var)
20174 enum machine_mode vsimode;
20190 var = force_reg (GET_MODE_INNER (mode), var);
20191 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20192 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20197 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20198 new_target = gen_reg_rtx (mode);
20200 new_target = target;
20201 var = force_reg (GET_MODE_INNER (mode), var);
20202 x = gen_rtx_VEC_DUPLICATE (mode, var);
20203 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20204 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20207 /* We need to shuffle the value to the correct position, so
20208 create a new pseudo to store the intermediate result. */
20210 /* With SSE2, we can use the integer shuffle insns. */
20211 if (mode != V4SFmode && TARGET_SSE2)
20213 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20215 GEN_INT (one_var == 1 ? 0 : 1),
20216 GEN_INT (one_var == 2 ? 0 : 1),
20217 GEN_INT (one_var == 3 ? 0 : 1)));
20218 if (target != new_target)
20219 emit_move_insn (target, new_target);
20223 /* Otherwise convert the intermediate result to V4SFmode and
20224 use the SSE1 shuffle instructions. */
20225 if (mode != V4SFmode)
20227 tmp = gen_reg_rtx (V4SFmode);
20228 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20233 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20235 GEN_INT (one_var == 1 ? 0 : 1),
20236 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20237 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20239 if (mode != V4SFmode)
20240 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20241 else if (tmp != target)
20242 emit_move_insn (target, tmp);
20244 else if (target != new_target)
20245 emit_move_insn (target, new_target);
20250 vsimode = V4SImode;
20256 vsimode = V2SImode;
20262 /* Zero extend the variable element to SImode and recurse. */
20263 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20265 x = gen_reg_rtx (vsimode);
20266 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20268 gcc_unreachable ();
20270 emit_move_insn (target, gen_lowpart (mode, x));
20278 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20279 consisting of the values in VALS. It is known that all elements
20280 except ONE_VAR are constants. Return true if successful. */
20283 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20284 rtx target, rtx vals, int one_var)
20286 rtx var = XVECEXP (vals, 0, one_var);
20287 enum machine_mode wmode;
20290 const_vec = copy_rtx (vals);
20291 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20292 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20300 /* For the two element vectors, it's just as easy to use
20301 the general case. */
20317 /* There's no way to set one QImode entry easily. Combine
20318 the variable value with its adjacent constant value, and
20319 promote to an HImode set. */
20320 x = XVECEXP (vals, 0, one_var ^ 1);
20323 var = convert_modes (HImode, QImode, var, true);
20324 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20325 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20326 x = GEN_INT (INTVAL (x) & 0xff);
20330 var = convert_modes (HImode, QImode, var, true);
20331 x = gen_int_mode (INTVAL (x) << 8, HImode);
20333 if (x != const0_rtx)
20334 var = expand_simple_binop (HImode, IOR, var, x, var,
20335 1, OPTAB_LIB_WIDEN);
20337 x = gen_reg_rtx (wmode);
20338 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20339 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20341 emit_move_insn (target, gen_lowpart (mode, x));
20348 emit_move_insn (target, const_vec);
20349 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20353 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20354 all values variable, and none identical. */
20357 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20358 rtx target, rtx vals)
20360 enum machine_mode half_mode = GET_MODE_INNER (mode);
20361 rtx op0 = NULL, op1 = NULL;
20362 bool use_vec_concat = false;
20368 if (!mmx_ok && !TARGET_SSE)
20374 /* For the two element vectors, we always implement VEC_CONCAT. */
20375 op0 = XVECEXP (vals, 0, 0);
20376 op1 = XVECEXP (vals, 0, 1);
20377 use_vec_concat = true;
20381 half_mode = V2SFmode;
20384 half_mode = V2SImode;
20390 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20391 Recurse to load the two halves. */
20393 op0 = gen_reg_rtx (half_mode);
20394 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20395 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20397 op1 = gen_reg_rtx (half_mode);
20398 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20399 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20401 use_vec_concat = true;
20412 gcc_unreachable ();
20415 if (use_vec_concat)
20417 if (!register_operand (op0, half_mode))
20418 op0 = force_reg (half_mode, op0);
20419 if (!register_operand (op1, half_mode))
20420 op1 = force_reg (half_mode, op1);
20422 emit_insn (gen_rtx_SET (VOIDmode, target,
20423 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20427 int i, j, n_elts, n_words, n_elt_per_word;
20428 enum machine_mode inner_mode;
20429 rtx words[4], shift;
20431 inner_mode = GET_MODE_INNER (mode);
20432 n_elts = GET_MODE_NUNITS (mode);
20433 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20434 n_elt_per_word = n_elts / n_words;
20435 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20437 for (i = 0; i < n_words; ++i)
20439 rtx word = NULL_RTX;
20441 for (j = 0; j < n_elt_per_word; ++j)
20443 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20444 elt = convert_modes (word_mode, inner_mode, elt, true);
20450 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20451 word, 1, OPTAB_LIB_WIDEN);
20452 word = expand_simple_binop (word_mode, IOR, word, elt,
20453 word, 1, OPTAB_LIB_WIDEN);
20461 emit_move_insn (target, gen_lowpart (mode, words[0]));
20462 else if (n_words == 2)
20464 rtx tmp = gen_reg_rtx (mode);
20465 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20466 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20467 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20468 emit_move_insn (target, tmp);
20470 else if (n_words == 4)
20472 rtx tmp = gen_reg_rtx (V4SImode);
20473 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20474 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20475 emit_move_insn (target, gen_lowpart (mode, tmp));
20478 gcc_unreachable ();
20482 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20483 instructions unless MMX_OK is true. */
20486 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20488 enum machine_mode mode = GET_MODE (target);
20489 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20490 int n_elts = GET_MODE_NUNITS (mode);
20491 int n_var = 0, one_var = -1;
20492 bool all_same = true, all_const_zero = true;
20496 for (i = 0; i < n_elts; ++i)
20498 x = XVECEXP (vals, 0, i);
20499 if (!CONSTANT_P (x))
20500 n_var++, one_var = i;
20501 else if (x != CONST0_RTX (inner_mode))
20502 all_const_zero = false;
20503 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20507 /* Constants are best loaded from the constant pool. */
20510 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20514 /* If all values are identical, broadcast the value. */
20516 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20517 XVECEXP (vals, 0, 0)))
20520 /* Values where only one field is non-constant are best loaded from
20521 the pool and overwritten via move later. */
20525 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20526 XVECEXP (vals, 0, one_var),
20530 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20534 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20538 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20540 enum machine_mode mode = GET_MODE (target);
20541 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20542 bool use_vec_merge = false;
20551 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20552 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20554 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20556 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20557 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20567 /* For the two element vectors, we implement a VEC_CONCAT with
20568 the extraction of the other element. */
20570 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20571 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20574 op0 = val, op1 = tmp;
20576 op0 = tmp, op1 = val;
20578 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20579 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20587 use_vec_merge = true;
20591 /* tmp = target = A B C D */
20592 tmp = copy_to_reg (target);
20593 /* target = A A B B */
20594 emit_insn (gen_sse_unpcklps (target, target, target));
20595 /* target = X A B B */
20596 ix86_expand_vector_set (false, target, val, 0);
20597 /* target = A X C D */
20598 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20599 GEN_INT (1), GEN_INT (0),
20600 GEN_INT (2+4), GEN_INT (3+4)));
20604 /* tmp = target = A B C D */
20605 tmp = copy_to_reg (target);
20606 /* tmp = X B C D */
20607 ix86_expand_vector_set (false, tmp, val, 0);
20608 /* target = A B X D */
20609 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20610 GEN_INT (0), GEN_INT (1),
20611 GEN_INT (0+4), GEN_INT (3+4)));
20615 /* tmp = target = A B C D */
20616 tmp = copy_to_reg (target);
20617 /* tmp = X B C D */
20618 ix86_expand_vector_set (false, tmp, val, 0);
20619 /* target = A B X D */
20620 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20621 GEN_INT (0), GEN_INT (1),
20622 GEN_INT (2+4), GEN_INT (0+4)));
20626 gcc_unreachable ();
20631 /* Element 0 handled by vec_merge below. */
20634 use_vec_merge = true;
20640 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20641 store into element 0, then shuffle them back. */
20645 order[0] = GEN_INT (elt);
20646 order[1] = const1_rtx;
20647 order[2] = const2_rtx;
20648 order[3] = GEN_INT (3);
20649 order[elt] = const0_rtx;
20651 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20652 order[1], order[2], order[3]));
20654 ix86_expand_vector_set (false, target, val, 0);
20656 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20657 order[1], order[2], order[3]));
20661 /* For SSE1, we have to reuse the V4SF code. */
20662 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20663 gen_lowpart (SFmode, val), elt);
20668 use_vec_merge = TARGET_SSE2;
20671 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20682 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20683 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20684 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20688 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20690 emit_move_insn (mem, target);
20692 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20693 emit_move_insn (tmp, val);
20695 emit_move_insn (target, mem);
20700 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20702 enum machine_mode mode = GET_MODE (vec);
20703 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20704 bool use_vec_extr = false;
20717 use_vec_extr = true;
20729 tmp = gen_reg_rtx (mode);
20730 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20731 GEN_INT (elt), GEN_INT (elt),
20732 GEN_INT (elt+4), GEN_INT (elt+4)));
20736 tmp = gen_reg_rtx (mode);
20737 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20741 gcc_unreachable ();
20744 use_vec_extr = true;
20759 tmp = gen_reg_rtx (mode);
20760 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20761 GEN_INT (elt), GEN_INT (elt),
20762 GEN_INT (elt), GEN_INT (elt)));
20766 tmp = gen_reg_rtx (mode);
20767 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20771 gcc_unreachable ();
20774 use_vec_extr = true;
20779 /* For SSE1, we have to reuse the V4SF code. */
20780 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20781 gen_lowpart (V4SFmode, vec), elt);
20787 use_vec_extr = TARGET_SSE2;
20790 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20795 /* ??? Could extract the appropriate HImode element and shift. */
20802 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20803 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20805 /* Let the rtl optimizers know about the zero extension performed. */
20806 if (inner_mode == HImode)
20808 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20809 target = gen_lowpart (SImode, target);
20812 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20816 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20818 emit_move_insn (mem, vec);
20820 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20821 emit_move_insn (target, tmp);
20825 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20826 pattern to reduce; DEST is the destination; IN is the input vector. */
20829 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20831 rtx tmp1, tmp2, tmp3;
20833 tmp1 = gen_reg_rtx (V4SFmode);
20834 tmp2 = gen_reg_rtx (V4SFmode);
20835 tmp3 = gen_reg_rtx (V4SFmode);
20837 emit_insn (gen_sse_movhlps (tmp1, in, in));
20838 emit_insn (fn (tmp2, tmp1, in));
20840 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20841 GEN_INT (1), GEN_INT (1),
20842 GEN_INT (1+4), GEN_INT (1+4)));
20843 emit_insn (fn (dest, tmp2, tmp3));
20846 /* Target hook for scalar_mode_supported_p. */
20848 ix86_scalar_mode_supported_p (enum machine_mode mode)
20850 if (DECIMAL_FLOAT_MODE_P (mode))
20853 return default_scalar_mode_supported_p (mode);
20856 /* Implements target hook vector_mode_supported_p. */
20858 ix86_vector_mode_supported_p (enum machine_mode mode)
20860 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20862 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20864 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20866 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20871 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20873 We do this in the new i386 backend to maintain source compatibility
20874 with the old cc0-based compiler. */
20877 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20878 tree inputs ATTRIBUTE_UNUSED,
20881 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20883 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20888 /* Implementes target vector targetm.asm.encode_section_info. This
20889 is not used by netware. */
20891 static void ATTRIBUTE_UNUSED
20892 ix86_encode_section_info (tree decl, rtx rtl, int first)
20894 default_encode_section_info (decl, rtl, first);
20896 if (TREE_CODE (decl) == VAR_DECL
20897 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20898 && ix86_in_large_data_p (decl))
20899 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20902 /* Worker function for REVERSE_CONDITION. */
20905 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20907 return (mode != CCFPmode && mode != CCFPUmode
20908 ? reverse_condition (code)
20909 : reverse_condition_maybe_unordered (code));
20912 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20916 output_387_reg_move (rtx insn, rtx *operands)
20918 if (REG_P (operands[1])
20919 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20921 if (REGNO (operands[0]) == FIRST_STACK_REG)
20922 return output_387_ffreep (operands, 0);
20923 return "fstp\t%y0";
20925 if (STACK_TOP_P (operands[0]))
20926 return "fld%z1\t%y1";
20930 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20931 FP status register is set. */
20934 ix86_emit_fp_unordered_jump (rtx label)
20936 rtx reg = gen_reg_rtx (HImode);
20939 emit_insn (gen_x86_fnstsw_1 (reg));
20941 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
20943 emit_insn (gen_x86_sahf_1 (reg));
20945 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20946 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20950 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20952 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20953 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20956 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20957 gen_rtx_LABEL_REF (VOIDmode, label),
20959 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20961 emit_jump_insn (temp);
20962 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20965 /* Output code to perform a log1p XFmode calculation. */
20967 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20969 rtx label1 = gen_label_rtx ();
20970 rtx label2 = gen_label_rtx ();
20972 rtx tmp = gen_reg_rtx (XFmode);
20973 rtx tmp2 = gen_reg_rtx (XFmode);
20975 emit_insn (gen_absxf2 (tmp, op1));
20976 emit_insn (gen_cmpxf (tmp,
20977 CONST_DOUBLE_FROM_REAL_VALUE (
20978 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20980 emit_jump_insn (gen_bge (label1));
20982 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20983 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20984 emit_jump (label2);
20986 emit_label (label1);
20987 emit_move_insn (tmp, CONST1_RTX (XFmode));
20988 emit_insn (gen_addxf3 (tmp, op1, tmp));
20989 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20990 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20992 emit_label (label2);
20995 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20997 static void ATTRIBUTE_UNUSED
20998 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21001 /* With Binutils 2.15, the "@unwind" marker must be specified on
21002 every occurrence of the ".eh_frame" section, not just the first
21005 && strcmp (name, ".eh_frame") == 0)
21007 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21008 flags & SECTION_WRITE ? "aw" : "a");
21011 default_elf_asm_named_section (name, flags, decl);
21014 /* Return the mangling of TYPE if it is an extended fundamental type. */
21016 static const char *
21017 ix86_mangle_fundamental_type (tree type)
21019 switch (TYPE_MODE (type))
21022 /* __float128 is "g". */
21025 /* "long double" or __float80 is "e". */
21032 /* For 32-bit code we can save PIC register setup by using
21033 __stack_chk_fail_local hidden function instead of calling
21034 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21035 register, so it is better to call __stack_chk_fail directly. */
21038 ix86_stack_protect_fail (void)
21040 return TARGET_64BIT
21041 ? default_external_stack_protect_fail ()
21042 : default_hidden_stack_protect_fail ();
21045 /* Select a format to encode pointers in exception handling data. CODE
21046 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21047 true if the symbol may be affected by dynamic relocations.
21049 ??? All x86 object file formats are capable of representing this.
21050 After all, the relocation needed is the same as for the call insn.
21051 Whether or not a particular assembler allows us to enter such, I
21052 guess we'll have to see. */
21054 asm_preferred_eh_data_format (int code, int global)
21058 int type = DW_EH_PE_sdata8;
21060 || ix86_cmodel == CM_SMALL_PIC
21061 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21062 type = DW_EH_PE_sdata4;
21063 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21065 if (ix86_cmodel == CM_SMALL
21066 || (ix86_cmodel == CM_MEDIUM && code))
21067 return DW_EH_PE_udata4;
21068 return DW_EH_PE_absptr;
21071 /* Expand copysign from SIGN to the positive value ABS_VALUE
21072 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21075 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21077 enum machine_mode mode = GET_MODE (sign);
21078 rtx sgn = gen_reg_rtx (mode);
21079 if (mask == NULL_RTX)
21081 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21082 if (!VECTOR_MODE_P (mode))
21084 /* We need to generate a scalar mode mask in this case. */
21085 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21086 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21087 mask = gen_reg_rtx (mode);
21088 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21092 mask = gen_rtx_NOT (mode, mask);
21093 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21094 gen_rtx_AND (mode, mask, sign)));
21095 emit_insn (gen_rtx_SET (VOIDmode, result,
21096 gen_rtx_IOR (mode, abs_value, sgn)));
21099 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21100 mask for masking out the sign-bit is stored in *SMASK, if that is
21103 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21105 enum machine_mode mode = GET_MODE (op0);
21108 xa = gen_reg_rtx (mode);
21109 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21110 if (!VECTOR_MODE_P (mode))
21112 /* We need to generate a scalar mode mask in this case. */
21113 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21114 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21115 mask = gen_reg_rtx (mode);
21116 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21118 emit_insn (gen_rtx_SET (VOIDmode, xa,
21119 gen_rtx_AND (mode, op0, mask)));
21127 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21128 swapping the operands if SWAP_OPERANDS is true. The expanded
21129 code is a forward jump to a newly created label in case the
21130 comparison is true. The generated label rtx is returned. */
21132 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21133 bool swap_operands)
21144 label = gen_label_rtx ();
21145 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21146 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21147 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21148 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21149 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21150 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21151 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21152 JUMP_LABEL (tmp) = label;
21157 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21158 using comparison code CODE. Operands are swapped for the comparison if
21159 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21161 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21162 bool swap_operands)
21164 enum machine_mode mode = GET_MODE (op0);
21165 rtx mask = gen_reg_rtx (mode);
21174 if (mode == DFmode)
21175 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21176 gen_rtx_fmt_ee (code, mode, op0, op1)));
21178 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21179 gen_rtx_fmt_ee (code, mode, op0, op1)));
21184 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21185 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21187 ix86_gen_TWO52 (enum machine_mode mode)
21189 REAL_VALUE_TYPE TWO52r;
21192 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21193 TWO52 = const_double_from_real_value (TWO52r, mode);
21194 TWO52 = force_reg (mode, TWO52);
21199 /* Expand SSE sequence for computing lround from OP1 storing
21202 ix86_expand_lround (rtx op0, rtx op1)
21204 /* C code for the stuff we're doing below:
21205 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21208 enum machine_mode mode = GET_MODE (op1);
21209 const struct real_format *fmt;
21210 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21213 /* load nextafter (0.5, 0.0) */
21214 fmt = REAL_MODE_FORMAT (mode);
21215 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21216 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21218 /* adj = copysign (0.5, op1) */
21219 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21220 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21222 /* adj = op1 + adj */
21223 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21225 /* op0 = (imode)adj */
21226 expand_fix (op0, adj, 0);
21229 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21232 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21234 /* C code for the stuff we're doing below (for do_floor):
21236 xi -= (double)xi > op1 ? 1 : 0;
21239 enum machine_mode fmode = GET_MODE (op1);
21240 enum machine_mode imode = GET_MODE (op0);
21241 rtx ireg, freg, label, tmp;
21243 /* reg = (long)op1 */
21244 ireg = gen_reg_rtx (imode);
21245 expand_fix (ireg, op1, 0);
21247 /* freg = (double)reg */
21248 freg = gen_reg_rtx (fmode);
21249 expand_float (freg, ireg, 0);
21251 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21252 label = ix86_expand_sse_compare_and_jump (UNLE,
21253 freg, op1, !do_floor);
21254 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21255 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21256 emit_move_insn (ireg, tmp);
21258 emit_label (label);
21259 LABEL_NUSES (label) = 1;
21261 emit_move_insn (op0, ireg);
21264 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21265 result in OPERAND0. */
21267 ix86_expand_rint (rtx operand0, rtx operand1)
21269 /* C code for the stuff we're doing below:
21270 xa = fabs (operand1);
21271 if (!isless (xa, 2**52))
21273 xa = xa + 2**52 - 2**52;
21274 return copysign (xa, operand1);
21276 enum machine_mode mode = GET_MODE (operand0);
21277 rtx res, xa, label, TWO52, mask;
21279 res = gen_reg_rtx (mode);
21280 emit_move_insn (res, operand1);
21282 /* xa = abs (operand1) */
21283 xa = ix86_expand_sse_fabs (res, &mask);
21285 /* if (!isless (xa, TWO52)) goto label; */
21286 TWO52 = ix86_gen_TWO52 (mode);
21287 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21289 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21290 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21292 ix86_sse_copysign_to_positive (res, xa, res, mask);
21294 emit_label (label);
21295 LABEL_NUSES (label) = 1;
21297 emit_move_insn (operand0, res);
21300 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21303 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21305 /* C code for the stuff we expand below.
21306 double xa = fabs (x), x2;
21307 if (!isless (xa, TWO52))
21309 xa = xa + TWO52 - TWO52;
21310 x2 = copysign (xa, x);
21319 enum machine_mode mode = GET_MODE (operand0);
21320 rtx xa, TWO52, tmp, label, one, res, mask;
21322 TWO52 = ix86_gen_TWO52 (mode);
21324 /* Temporary for holding the result, initialized to the input
21325 operand to ease control flow. */
21326 res = gen_reg_rtx (mode);
21327 emit_move_insn (res, operand1);
21329 /* xa = abs (operand1) */
21330 xa = ix86_expand_sse_fabs (res, &mask);
21332 /* if (!isless (xa, TWO52)) goto label; */
21333 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21335 /* xa = xa + TWO52 - TWO52; */
21336 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21337 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21339 /* xa = copysign (xa, operand1) */
21340 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21342 /* generate 1.0 or -1.0 */
21343 one = force_reg (mode,
21344 const_double_from_real_value (do_floor
21345 ? dconst1 : dconstm1, mode));
21347 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21348 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21349 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21350 gen_rtx_AND (mode, one, tmp)));
21351 /* We always need to subtract here to preserve signed zero. */
21352 tmp = expand_simple_binop (mode, MINUS,
21353 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21354 emit_move_insn (res, tmp);
21356 emit_label (label);
21357 LABEL_NUSES (label) = 1;
21359 emit_move_insn (operand0, res);
21362 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21365 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21367 /* C code for the stuff we expand below.
21368 double xa = fabs (x), x2;
21369 if (!isless (xa, TWO52))
21371 x2 = (double)(long)x;
21378 if (HONOR_SIGNED_ZEROS (mode))
21379 return copysign (x2, x);
21382 enum machine_mode mode = GET_MODE (operand0);
21383 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21385 TWO52 = ix86_gen_TWO52 (mode);
21387 /* Temporary for holding the result, initialized to the input
21388 operand to ease control flow. */
21389 res = gen_reg_rtx (mode);
21390 emit_move_insn (res, operand1);
21392 /* xa = abs (operand1) */
21393 xa = ix86_expand_sse_fabs (res, &mask);
21395 /* if (!isless (xa, TWO52)) goto label; */
21396 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21398 /* xa = (double)(long)x */
21399 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21400 expand_fix (xi, res, 0);
21401 expand_float (xa, xi, 0);
21404 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21406 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21407 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21408 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21409 gen_rtx_AND (mode, one, tmp)));
21410 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21411 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21412 emit_move_insn (res, tmp);
21414 if (HONOR_SIGNED_ZEROS (mode))
21415 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21417 emit_label (label);
21418 LABEL_NUSES (label) = 1;
21420 emit_move_insn (operand0, res);
21423 /* Expand SSE sequence for computing round from OPERAND1 storing
21424 into OPERAND0. Sequence that works without relying on DImode truncation
21425 via cvttsd2siq that is only available on 64bit targets. */
21427 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21429 /* C code for the stuff we expand below.
21430 double xa = fabs (x), xa2, x2;
21431 if (!isless (xa, TWO52))
21433 Using the absolute value and copying back sign makes
21434 -0.0 -> -0.0 correct.
21435 xa2 = xa + TWO52 - TWO52;
21440 else if (dxa > 0.5)
21442 x2 = copysign (xa2, x);
21445 enum machine_mode mode = GET_MODE (operand0);
21446 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21448 TWO52 = ix86_gen_TWO52 (mode);
21450 /* Temporary for holding the result, initialized to the input
21451 operand to ease control flow. */
21452 res = gen_reg_rtx (mode);
21453 emit_move_insn (res, operand1);
21455 /* xa = abs (operand1) */
21456 xa = ix86_expand_sse_fabs (res, &mask);
21458 /* if (!isless (xa, TWO52)) goto label; */
21459 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21461 /* xa2 = xa + TWO52 - TWO52; */
21462 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21463 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21465 /* dxa = xa2 - xa; */
21466 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21468 /* generate 0.5, 1.0 and -0.5 */
21469 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21470 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21471 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21475 tmp = gen_reg_rtx (mode);
21476 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21477 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21478 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21479 gen_rtx_AND (mode, one, tmp)));
21480 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21481 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21482 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21483 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21484 gen_rtx_AND (mode, one, tmp)));
21485 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21487 /* res = copysign (xa2, operand1) */
21488 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21490 emit_label (label);
21491 LABEL_NUSES (label) = 1;
21493 emit_move_insn (operand0, res);
21496 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21499 ix86_expand_trunc (rtx operand0, rtx operand1)
21501 /* C code for SSE variant we expand below.
21502 double xa = fabs (x), x2;
21503 if (!isless (xa, TWO52))
21505 x2 = (double)(long)x;
21506 if (HONOR_SIGNED_ZEROS (mode))
21507 return copysign (x2, x);
21510 enum machine_mode mode = GET_MODE (operand0);
21511 rtx xa, xi, TWO52, label, res, mask;
21513 TWO52 = ix86_gen_TWO52 (mode);
21515 /* Temporary for holding the result, initialized to the input
21516 operand to ease control flow. */
21517 res = gen_reg_rtx (mode);
21518 emit_move_insn (res, operand1);
21520 /* xa = abs (operand1) */
21521 xa = ix86_expand_sse_fabs (res, &mask);
21523 /* if (!isless (xa, TWO52)) goto label; */
21524 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21526 /* x = (double)(long)x */
21527 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21528 expand_fix (xi, res, 0);
21529 expand_float (res, xi, 0);
21531 if (HONOR_SIGNED_ZEROS (mode))
21532 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21534 emit_label (label);
21535 LABEL_NUSES (label) = 1;
21537 emit_move_insn (operand0, res);
21540 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21543 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21545 enum machine_mode mode = GET_MODE (operand0);
21546 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21548 /* C code for SSE variant we expand below.
21549 double xa = fabs (x), x2;
21550 if (!isless (xa, TWO52))
21552 xa2 = xa + TWO52 - TWO52;
21556 x2 = copysign (xa2, x);
21560 TWO52 = ix86_gen_TWO52 (mode);
21562 /* Temporary for holding the result, initialized to the input
21563 operand to ease control flow. */
21564 res = gen_reg_rtx (mode);
21565 emit_move_insn (res, operand1);
21567 /* xa = abs (operand1) */
21568 xa = ix86_expand_sse_fabs (res, &smask);
21570 /* if (!isless (xa, TWO52)) goto label; */
21571 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21573 /* res = xa + TWO52 - TWO52; */
21574 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21575 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21576 emit_move_insn (res, tmp);
21579 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21581 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21582 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21583 emit_insn (gen_rtx_SET (VOIDmode, mask,
21584 gen_rtx_AND (mode, mask, one)));
21585 tmp = expand_simple_binop (mode, MINUS,
21586 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21587 emit_move_insn (res, tmp);
21589 /* res = copysign (res, operand1) */
21590 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21592 emit_label (label);
21593 LABEL_NUSES (label) = 1;
21595 emit_move_insn (operand0, res);
21598 /* Expand SSE sequence for computing round from OPERAND1 storing
21601 ix86_expand_round (rtx operand0, rtx operand1)
21603 /* C code for the stuff we're doing below:
21604 double xa = fabs (x);
21605 if (!isless (xa, TWO52))
21607 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21608 return copysign (xa, x);
21610 enum machine_mode mode = GET_MODE (operand0);
21611 rtx res, TWO52, xa, label, xi, half, mask;
21612 const struct real_format *fmt;
21613 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21615 /* Temporary for holding the result, initialized to the input
21616 operand to ease control flow. */
21617 res = gen_reg_rtx (mode);
21618 emit_move_insn (res, operand1);
21620 TWO52 = ix86_gen_TWO52 (mode);
21621 xa = ix86_expand_sse_fabs (res, &mask);
21622 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21624 /* load nextafter (0.5, 0.0) */
21625 fmt = REAL_MODE_FORMAT (mode);
21626 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21627 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21629 /* xa = xa + 0.5 */
21630 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21631 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21633 /* xa = (double)(int64_t)xa */
21634 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21635 expand_fix (xi, xa, 0);
21636 expand_float (xa, xi, 0);
21638 /* res = copysign (xa, operand1) */
21639 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21641 emit_label (label);
21642 LABEL_NUSES (label) = 1;
21644 emit_move_insn (operand0, res);
21648 /* Table of valid machine attributes. */
21649 static const struct attribute_spec ix86_attribute_table[] =
21651 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21652 /* Stdcall attribute says callee is responsible for popping arguments
21653 if they are not variable. */
21654 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21655 /* Fastcall attribute says callee is responsible for popping arguments
21656 if they are not variable. */
21657 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21658 /* Cdecl attribute says the callee is a normal C declaration */
21659 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21660 /* Regparm attribute specifies how many integer arguments are to be
21661 passed in registers. */
21662 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21663 /* Sseregparm attribute says we are using x86_64 calling conventions
21664 for FP arguments. */
21665 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21666 /* force_align_arg_pointer says this function realigns the stack at entry. */
21667 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21668 false, true, true, ix86_handle_cconv_attribute },
21669 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21670 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21671 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21672 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21674 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21675 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21676 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21677 SUBTARGET_ATTRIBUTE_TABLE,
21679 { NULL, 0, 0, false, false, false, NULL }
21682 /* Initialize the GCC target structure. */
21683 #undef TARGET_ATTRIBUTE_TABLE
21684 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21685 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21686 # undef TARGET_MERGE_DECL_ATTRIBUTES
21687 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21690 #undef TARGET_COMP_TYPE_ATTRIBUTES
21691 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21693 #undef TARGET_INIT_BUILTINS
21694 #define TARGET_INIT_BUILTINS ix86_init_builtins
21695 #undef TARGET_EXPAND_BUILTIN
21696 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21698 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21699 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21700 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21701 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21703 #undef TARGET_ASM_FUNCTION_EPILOGUE
21704 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21706 #undef TARGET_ENCODE_SECTION_INFO
21707 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21708 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21710 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21713 #undef TARGET_ASM_OPEN_PAREN
21714 #define TARGET_ASM_OPEN_PAREN ""
21715 #undef TARGET_ASM_CLOSE_PAREN
21716 #define TARGET_ASM_CLOSE_PAREN ""
21718 #undef TARGET_ASM_ALIGNED_HI_OP
21719 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
21720 #undef TARGET_ASM_ALIGNED_SI_OP
21721 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
21723 #undef TARGET_ASM_ALIGNED_DI_OP
21724 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
21727 #undef TARGET_ASM_UNALIGNED_HI_OP
21728 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
21729 #undef TARGET_ASM_UNALIGNED_SI_OP
21730 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
21731 #undef TARGET_ASM_UNALIGNED_DI_OP
21732 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
21734 #undef TARGET_SCHED_ADJUST_COST
21735 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
21736 #undef TARGET_SCHED_ISSUE_RATE
21737 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
21738 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
21739 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
21740 ia32_multipass_dfa_lookahead
21742 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
21743 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
21746 #undef TARGET_HAVE_TLS
21747 #define TARGET_HAVE_TLS true
21749 #undef TARGET_CANNOT_FORCE_CONST_MEM
21750 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
21751 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
21752 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
21754 #undef TARGET_DELEGITIMIZE_ADDRESS
21755 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
21757 #undef TARGET_MS_BITFIELD_LAYOUT_P
21758 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
21761 #undef TARGET_BINDS_LOCAL_P
21762 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
21764 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21765 #undef TARGET_BINDS_LOCAL_P
21766 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
21769 #undef TARGET_ASM_OUTPUT_MI_THUNK
21770 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
21771 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
21772 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
21774 #undef TARGET_ASM_FILE_START
21775 #define TARGET_ASM_FILE_START x86_file_start
21777 #undef TARGET_DEFAULT_TARGET_FLAGS
21778 #define TARGET_DEFAULT_TARGET_FLAGS \
21780 | TARGET_64BIT_DEFAULT \
21781 | TARGET_SUBTARGET_DEFAULT \
21782 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
21784 #undef TARGET_HANDLE_OPTION
21785 #define TARGET_HANDLE_OPTION ix86_handle_option
21787 #undef TARGET_RTX_COSTS
21788 #define TARGET_RTX_COSTS ix86_rtx_costs
21789 #undef TARGET_ADDRESS_COST
21790 #define TARGET_ADDRESS_COST ix86_address_cost
21792 #undef TARGET_FIXED_CONDITION_CODE_REGS
21793 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
21794 #undef TARGET_CC_MODES_COMPATIBLE
21795 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
21797 #undef TARGET_MACHINE_DEPENDENT_REORG
21798 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
21800 #undef TARGET_BUILD_BUILTIN_VA_LIST
21801 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
21803 #undef TARGET_MD_ASM_CLOBBERS
21804 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
21806 #undef TARGET_PROMOTE_PROTOTYPES
21807 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
21808 #undef TARGET_STRUCT_VALUE_RTX
21809 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
21810 #undef TARGET_SETUP_INCOMING_VARARGS
21811 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
21812 #undef TARGET_MUST_PASS_IN_STACK
21813 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
21814 #undef TARGET_PASS_BY_REFERENCE
21815 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
21816 #undef TARGET_INTERNAL_ARG_POINTER
21817 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
21818 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
21819 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
21821 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
21822 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
21824 #undef TARGET_SCALAR_MODE_SUPPORTED_P
21825 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
21827 #undef TARGET_VECTOR_MODE_SUPPORTED_P
21828 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
21831 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
21832 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
21835 #ifdef SUBTARGET_INSERT_ATTRIBUTES
21836 #undef TARGET_INSERT_ATTRIBUTES
21837 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
21840 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
21841 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
21843 #undef TARGET_STACK_PROTECT_FAIL
21844 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
21846 #undef TARGET_FUNCTION_VALUE
21847 #define TARGET_FUNCTION_VALUE ix86_function_value
21849 struct gcc_target targetm = TARGET_INITIALIZER;
21851 #include "gt-i386.h"