1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8 | m_GENERIC64 | m_AMDFAM10,
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1248 | m_NOCONA | m_CORE2 | m_GENERIC;
1250 static enum stringop_alg stringop_alg = no_stringop;
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1259 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1260 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1265 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1267 /* ax, dx, cx, bx */
1268 AREG, DREG, CREG, BREG,
1269 /* si, di, bp, sp */
1270 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1272 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1273 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1278 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1280 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1282 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1283 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1284 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 /* The "default" register map used in 32bit mode. */
1290 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1301 static int const x86_64_int_parameter_registers[6] =
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1307 static int const x86_64_int_return_registers[4] =
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1378 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1392 rtx ix86_compare_op0 = NULL_RTX;
1393 rtx ix86_compare_op1 = NULL_RTX;
1394 rtx ix86_compare_emitted = NULL_RTX;
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1399 /* Define the structure for the machine field in struct function. */
1401 struct stack_local_entry GTY(())
1403 unsigned short mode;
1406 struct stack_local_entry *next;
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1433 HOST_WIDE_INT frame;
1435 int outgoing_arguments_size;
1438 HOST_WIDE_INT to_allocate;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset;
1441 HOST_WIDE_INT hard_frame_pointer_offset;
1442 HOST_WIDE_INT stack_pointer_offset;
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov;
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel;
1452 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1454 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath;
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune;
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch;
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse;
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm;
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer;
1473 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary;
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost;
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1484 int ix86_section_threshold = 65536;
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix[16];
1488 int internal_label_prefix_len;
1490 /* Register class used for passing given 64bit part of the argument.
1491 These represent classes as documented by the PS ABI, with the exception
1492 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1493 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1495 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1496 whenever possible (upper half does contain padding). */
1497 enum x86_64_reg_class
1500 X86_64_INTEGER_CLASS,
1501 X86_64_INTEGERSI_CLASS,
1508 X86_64_COMPLEX_X87_CLASS,
1511 static const char * const x86_64_reg_class_name[] =
1513 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1514 "sseup", "x87", "x87up", "cplx87", "no"
1517 #define MAX_CLASSES 4
1519 /* Table of constants used by fldpi, fldln2, etc.... */
1520 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1521 static bool ext_80387_constants_init = 0;
1524 static struct machine_function * ix86_init_machine_status (void);
1525 static rtx ix86_function_value (tree, tree, bool);
1526 static int ix86_function_regparm (tree, tree);
1527 static void ix86_compute_frame_layout (struct ix86_frame *);
1528 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1532 /* The svr4 ABI for the i386 says that records and unions are returned
1534 #ifndef DEFAULT_PCC_STRUCT_RETURN
1535 #define DEFAULT_PCC_STRUCT_RETURN 1
1538 /* Implement TARGET_HANDLE_OPTION. */
1541 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1548 target_flags &= ~MASK_3DNOW_A;
1549 target_flags_explicit |= MASK_3DNOW_A;
1556 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1557 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1564 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1565 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1572 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1573 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1580 target_flags &= ~MASK_SSE4A;
1581 target_flags_explicit |= MASK_SSE4A;
1590 /* Sometimes certain combinations of command options do not make
1591 sense on a particular target machine. You can define a macro
1592 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1593 defined, is executed once just after all the command options have
1596 Don't use this macro to turn on various extra optimizations for
1597 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1600 override_options (void)
1603 int ix86_tune_defaulted = 0;
1604 unsigned int ix86_arch_mask, ix86_tune_mask;
1606 /* Comes from final.c -- no real reason to change it. */
1607 #define MAX_CODE_ALIGN 16
1611 const struct processor_costs *cost; /* Processor costs */
1612 const int target_enable; /* Target flags to enable. */
1613 const int target_disable; /* Target flags to disable. */
1614 const int align_loop; /* Default alignments. */
1615 const int align_loop_max_skip;
1616 const int align_jump;
1617 const int align_jump_max_skip;
1618 const int align_func;
1620 const processor_target_table[PROCESSOR_max] =
1622 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1623 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1624 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1625 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1626 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1627 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1628 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1629 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1630 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1631 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1632 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1633 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1634 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1635 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1638 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1641 const char *const name; /* processor name or nickname. */
1642 const enum processor_type processor;
1643 const enum pta_flags
1649 PTA_PREFETCH_SSE = 1 << 4,
1651 PTA_3DNOW_A = 1 << 6,
1655 PTA_POPCNT = 1 << 10,
1657 PTA_SSE4A = 1 << 12,
1658 PTA_NO_SAHF = 1 << 13
1661 const processor_alias_table[] =
1663 {"i386", PROCESSOR_I386, 0},
1664 {"i486", PROCESSOR_I486, 0},
1665 {"i586", PROCESSOR_PENTIUM, 0},
1666 {"pentium", PROCESSOR_PENTIUM, 0},
1667 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1668 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1669 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1670 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1671 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1672 {"i686", PROCESSOR_PENTIUMPRO, 0},
1673 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1674 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1675 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1676 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1677 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1678 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1679 | PTA_MMX | PTA_PREFETCH_SSE},
1680 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1681 | PTA_MMX | PTA_PREFETCH_SSE},
1682 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1683 | PTA_MMX | PTA_PREFETCH_SSE},
1684 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1685 | PTA_MMX | PTA_PREFETCH_SSE
1686 | PTA_CX16 | PTA_NO_SAHF},
1687 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1688 | PTA_64BIT | PTA_MMX
1689 | PTA_PREFETCH_SSE | PTA_CX16},
1690 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1692 {"k6", PROCESSOR_K6, PTA_MMX},
1693 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1694 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1695 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1697 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1698 | PTA_3DNOW | PTA_3DNOW_A},
1699 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1700 | PTA_3DNOW_A | PTA_SSE},
1701 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1702 | PTA_3DNOW_A | PTA_SSE},
1703 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1704 | PTA_3DNOW_A | PTA_SSE},
1705 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1706 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1707 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1708 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1710 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1711 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1712 | PTA_SSE2 | PTA_NO_SAHF},
1713 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1714 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1715 | PTA_SSE2 | PTA_NO_SAHF},
1716 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1717 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1718 | PTA_SSE2 | PTA_NO_SAHF},
1719 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1720 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1721 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1722 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1723 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1724 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1727 int const pta_size = ARRAY_SIZE (processor_alias_table);
1729 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1730 SUBTARGET_OVERRIDE_OPTIONS;
1733 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1734 SUBSUBTARGET_OVERRIDE_OPTIONS;
1737 /* -fPIC is the default for x86_64. */
1738 if (TARGET_MACHO && TARGET_64BIT)
1741 /* Set the default values for switches whose default depends on TARGET_64BIT
1742 in case they weren't overwritten by command line options. */
1745 /* Mach-O doesn't support omitting the frame pointer for now. */
1746 if (flag_omit_frame_pointer == 2)
1747 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1748 if (flag_asynchronous_unwind_tables == 2)
1749 flag_asynchronous_unwind_tables = 1;
1750 if (flag_pcc_struct_return == 2)
1751 flag_pcc_struct_return = 0;
1755 if (flag_omit_frame_pointer == 2)
1756 flag_omit_frame_pointer = 0;
1757 if (flag_asynchronous_unwind_tables == 2)
1758 flag_asynchronous_unwind_tables = 0;
1759 if (flag_pcc_struct_return == 2)
1760 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1763 /* Need to check -mtune=generic first. */
1764 if (ix86_tune_string)
1766 if (!strcmp (ix86_tune_string, "generic")
1767 || !strcmp (ix86_tune_string, "i686")
1768 /* As special support for cross compilers we read -mtune=native
1769 as -mtune=generic. With native compilers we won't see the
1770 -mtune=native, as it was changed by the driver. */
1771 || !strcmp (ix86_tune_string, "native"))
1774 ix86_tune_string = "generic64";
1776 ix86_tune_string = "generic32";
1778 else if (!strncmp (ix86_tune_string, "generic", 7))
1779 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1783 if (ix86_arch_string)
1784 ix86_tune_string = ix86_arch_string;
1785 if (!ix86_tune_string)
1787 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1788 ix86_tune_defaulted = 1;
1791 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1792 need to use a sensible tune option. */
1793 if (!strcmp (ix86_tune_string, "generic")
1794 || !strcmp (ix86_tune_string, "x86-64")
1795 || !strcmp (ix86_tune_string, "i686"))
1798 ix86_tune_string = "generic64";
1800 ix86_tune_string = "generic32";
1803 if (ix86_stringop_string)
1805 if (!strcmp (ix86_stringop_string, "rep_byte"))
1806 stringop_alg = rep_prefix_1_byte;
1807 else if (!strcmp (ix86_stringop_string, "libcall"))
1808 stringop_alg = libcall;
1809 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1810 stringop_alg = rep_prefix_4_byte;
1811 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1812 stringop_alg = rep_prefix_8_byte;
1813 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1814 stringop_alg = loop_1_byte;
1815 else if (!strcmp (ix86_stringop_string, "loop"))
1816 stringop_alg = loop;
1817 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1818 stringop_alg = unrolled_loop;
1820 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1822 if (!strcmp (ix86_tune_string, "x86-64"))
1823 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1824 "-mtune=generic instead as appropriate.");
1826 if (!ix86_arch_string)
1827 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1828 if (!strcmp (ix86_arch_string, "generic"))
1829 error ("generic CPU can be used only for -mtune= switch");
1830 if (!strncmp (ix86_arch_string, "generic", 7))
1831 error ("bad value (%s) for -march= switch", ix86_arch_string);
1833 if (ix86_cmodel_string != 0)
1835 if (!strcmp (ix86_cmodel_string, "small"))
1836 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1837 else if (!strcmp (ix86_cmodel_string, "medium"))
1838 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1839 else if (!strcmp (ix86_cmodel_string, "large"))
1840 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1842 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1843 else if (!strcmp (ix86_cmodel_string, "32"))
1844 ix86_cmodel = CM_32;
1845 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1846 ix86_cmodel = CM_KERNEL;
1848 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1852 ix86_cmodel = CM_32;
1854 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1856 if (ix86_asm_string != 0)
1859 && !strcmp (ix86_asm_string, "intel"))
1860 ix86_asm_dialect = ASM_INTEL;
1861 else if (!strcmp (ix86_asm_string, "att"))
1862 ix86_asm_dialect = ASM_ATT;
1864 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1866 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1867 error ("code model %qs not supported in the %s bit mode",
1868 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1869 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1870 sorry ("%i-bit mode not compiled in",
1871 (target_flags & MASK_64BIT) ? 64 : 32);
1873 for (i = 0; i < pta_size; i++)
1874 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1876 ix86_arch = processor_alias_table[i].processor;
1877 /* Default cpu tuning to the architecture. */
1878 ix86_tune = ix86_arch;
1879 if (processor_alias_table[i].flags & PTA_MMX
1880 && !(target_flags_explicit & MASK_MMX))
1881 target_flags |= MASK_MMX;
1882 if (processor_alias_table[i].flags & PTA_3DNOW
1883 && !(target_flags_explicit & MASK_3DNOW))
1884 target_flags |= MASK_3DNOW;
1885 if (processor_alias_table[i].flags & PTA_3DNOW_A
1886 && !(target_flags_explicit & MASK_3DNOW_A))
1887 target_flags |= MASK_3DNOW_A;
1888 if (processor_alias_table[i].flags & PTA_SSE
1889 && !(target_flags_explicit & MASK_SSE))
1890 target_flags |= MASK_SSE;
1891 if (processor_alias_table[i].flags & PTA_SSE2
1892 && !(target_flags_explicit & MASK_SSE2))
1893 target_flags |= MASK_SSE2;
1894 if (processor_alias_table[i].flags & PTA_SSE3
1895 && !(target_flags_explicit & MASK_SSE3))
1896 target_flags |= MASK_SSE3;
1897 if (processor_alias_table[i].flags & PTA_SSSE3
1898 && !(target_flags_explicit & MASK_SSSE3))
1899 target_flags |= MASK_SSSE3;
1900 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1901 x86_prefetch_sse = true;
1902 if (processor_alias_table[i].flags & PTA_CX16)
1903 x86_cmpxchg16b = true;
1904 if (processor_alias_table[i].flags & PTA_POPCNT
1905 && !(target_flags_explicit & MASK_POPCNT))
1906 target_flags |= MASK_POPCNT;
1907 if (processor_alias_table[i].flags & PTA_ABM
1908 && !(target_flags_explicit & MASK_ABM))
1909 target_flags |= MASK_ABM;
1910 if (processor_alias_table[i].flags & PTA_SSE4A
1911 && !(target_flags_explicit & MASK_SSE4A))
1912 target_flags |= MASK_SSE4A;
1913 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1915 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1916 error ("CPU you selected does not support x86-64 "
1922 error ("bad value (%s) for -march= switch", ix86_arch_string);
1924 ix86_arch_mask = 1u << ix86_arch;
1925 for (i = 0; i < X86_ARCH_LAST; ++i)
1926 ix86_arch_features[i] &= ix86_arch_mask;
1928 for (i = 0; i < pta_size; i++)
1929 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1931 ix86_tune = processor_alias_table[i].processor;
1932 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1934 if (ix86_tune_defaulted)
1936 ix86_tune_string = "x86-64";
1937 for (i = 0; i < pta_size; i++)
1938 if (! strcmp (ix86_tune_string,
1939 processor_alias_table[i].name))
1941 ix86_tune = processor_alias_table[i].processor;
1944 error ("CPU you selected does not support x86-64 "
1947 /* Intel CPUs have always interpreted SSE prefetch instructions as
1948 NOPs; so, we can enable SSE prefetch instructions even when
1949 -mtune (rather than -march) points us to a processor that has them.
1950 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1951 higher processors. */
1952 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1953 x86_prefetch_sse = true;
1957 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1959 ix86_tune_mask = 1u << ix86_tune;
1960 for (i = 0; i < X86_TUNE_LAST; ++i)
1961 ix86_tune_features[i] &= ix86_tune_mask;
1964 ix86_cost = &size_cost;
1966 ix86_cost = processor_target_table[ix86_tune].cost;
1967 target_flags |= processor_target_table[ix86_tune].target_enable;
1968 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1970 /* Arrange to set up i386_stack_locals for all functions. */
1971 init_machine_status = ix86_init_machine_status;
1973 /* Validate -mregparm= value. */
1974 if (ix86_regparm_string)
1976 i = atoi (ix86_regparm_string);
1977 if (i < 0 || i > REGPARM_MAX)
1978 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1984 ix86_regparm = REGPARM_MAX;
1986 /* If the user has provided any of the -malign-* options,
1987 warn and use that value only if -falign-* is not set.
1988 Remove this code in GCC 3.2 or later. */
1989 if (ix86_align_loops_string)
1991 warning (0, "-malign-loops is obsolete, use -falign-loops");
1992 if (align_loops == 0)
1994 i = atoi (ix86_align_loops_string);
1995 if (i < 0 || i > MAX_CODE_ALIGN)
1996 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1998 align_loops = 1 << i;
2002 if (ix86_align_jumps_string)
2004 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2005 if (align_jumps == 0)
2007 i = atoi (ix86_align_jumps_string);
2008 if (i < 0 || i > MAX_CODE_ALIGN)
2009 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2011 align_jumps = 1 << i;
2015 if (ix86_align_funcs_string)
2017 warning (0, "-malign-functions is obsolete, use -falign-functions");
2018 if (align_functions == 0)
2020 i = atoi (ix86_align_funcs_string);
2021 if (i < 0 || i > MAX_CODE_ALIGN)
2022 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2024 align_functions = 1 << i;
2028 /* Default align_* from the processor table. */
2029 if (align_loops == 0)
2031 align_loops = processor_target_table[ix86_tune].align_loop;
2032 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2034 if (align_jumps == 0)
2036 align_jumps = processor_target_table[ix86_tune].align_jump;
2037 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2039 if (align_functions == 0)
2041 align_functions = processor_target_table[ix86_tune].align_func;
2044 /* Validate -mbranch-cost= value, or provide default. */
2045 ix86_branch_cost = ix86_cost->branch_cost;
2046 if (ix86_branch_cost_string)
2048 i = atoi (ix86_branch_cost_string);
2050 error ("-mbranch-cost=%d is not between 0 and 5", i);
2052 ix86_branch_cost = i;
2054 if (ix86_section_threshold_string)
2056 i = atoi (ix86_section_threshold_string);
2058 error ("-mlarge-data-threshold=%d is negative", i);
2060 ix86_section_threshold = i;
2063 if (ix86_tls_dialect_string)
2065 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2066 ix86_tls_dialect = TLS_DIALECT_GNU;
2067 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2068 ix86_tls_dialect = TLS_DIALECT_GNU2;
2069 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2070 ix86_tls_dialect = TLS_DIALECT_SUN;
2072 error ("bad value (%s) for -mtls-dialect= switch",
2073 ix86_tls_dialect_string);
2076 /* Keep nonleaf frame pointers. */
2077 if (flag_omit_frame_pointer)
2078 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2079 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2080 flag_omit_frame_pointer = 1;
2082 /* If we're doing fast math, we don't care about comparison order
2083 wrt NaNs. This lets us use a shorter comparison sequence. */
2084 if (flag_finite_math_only)
2085 target_flags &= ~MASK_IEEE_FP;
2087 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2088 since the insns won't need emulation. */
2089 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2090 target_flags &= ~MASK_NO_FANCY_MATH_387;
2092 /* Likewise, if the target doesn't have a 387, or we've specified
2093 software floating point, don't use 387 inline intrinsics. */
2095 target_flags |= MASK_NO_FANCY_MATH_387;
2097 /* Turn on SSE3 builtins for -mssse3. */
2099 target_flags |= MASK_SSE3;
2101 /* Turn on SSE3 builtins for -msse4a. */
2103 target_flags |= MASK_SSE3;
2105 /* Turn on SSE2 builtins for -msse3. */
2107 target_flags |= MASK_SSE2;
2109 /* Turn on SSE builtins for -msse2. */
2111 target_flags |= MASK_SSE;
2113 /* Turn on MMX builtins for -msse. */
2116 target_flags |= MASK_MMX & ~target_flags_explicit;
2117 x86_prefetch_sse = true;
2120 /* Turn on MMX builtins for 3Dnow. */
2122 target_flags |= MASK_MMX;
2124 /* Turn on POPCNT builtins for -mabm. */
2126 target_flags |= MASK_POPCNT;
2130 if (TARGET_ALIGN_DOUBLE)
2131 error ("-malign-double makes no sense in the 64bit mode");
2133 error ("-mrtd calling convention not supported in the 64bit mode");
2135 /* Enable by default the SSE and MMX builtins. Do allow the user to
2136 explicitly disable any of these. In particular, disabling SSE and
2137 MMX for kernel code is extremely useful. */
2139 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2140 & ~target_flags_explicit);
2144 /* i386 ABI does not specify red zone. It still makes sense to use it
2145 when programmer takes care to stack from being destroyed. */
2146 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2147 target_flags |= MASK_NO_RED_ZONE;
2150 /* Validate -mpreferred-stack-boundary= value, or provide default.
2151 The default of 128 bits is for Pentium III's SSE __m128. We can't
2152 change it because of optimize_size. Otherwise, we can't mix object
2153 files compiled with -Os and -On. */
2154 ix86_preferred_stack_boundary = 128;
2155 if (ix86_preferred_stack_boundary_string)
2157 i = atoi (ix86_preferred_stack_boundary_string);
2158 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2159 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2160 TARGET_64BIT ? 4 : 2);
2162 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2165 /* Accept -msseregparm only if at least SSE support is enabled. */
2166 if (TARGET_SSEREGPARM
2168 error ("-msseregparm used without SSE enabled");
2170 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2171 if (ix86_fpmath_string != 0)
2173 if (! strcmp (ix86_fpmath_string, "387"))
2174 ix86_fpmath = FPMATH_387;
2175 else if (! strcmp (ix86_fpmath_string, "sse"))
2179 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2180 ix86_fpmath = FPMATH_387;
2183 ix86_fpmath = FPMATH_SSE;
2185 else if (! strcmp (ix86_fpmath_string, "387,sse")
2186 || ! strcmp (ix86_fpmath_string, "sse,387"))
2190 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2191 ix86_fpmath = FPMATH_387;
2193 else if (!TARGET_80387)
2195 warning (0, "387 instruction set disabled, using SSE arithmetics");
2196 ix86_fpmath = FPMATH_SSE;
2199 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2202 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2205 /* If the i387 is disabled, then do not return values in it. */
2207 target_flags &= ~MASK_FLOAT_RETURNS;
2209 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2210 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2212 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2214 /* ??? Unwind info is not correct around the CFG unless either a frame
2215 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2216 unwind info generation to be aware of the CFG and propagating states
2218 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2219 || flag_exceptions || flag_non_call_exceptions)
2220 && flag_omit_frame_pointer
2221 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2223 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2224 warning (0, "unwind tables currently require either a frame pointer "
2225 "or -maccumulate-outgoing-args for correctness");
2226 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2229 /* For sane SSE instruction set generation we need fcomi instruction.
2230 It is safe to enable all CMOVE instructions. */
2234 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2237 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2238 p = strchr (internal_label_prefix, 'X');
2239 internal_label_prefix_len = p - internal_label_prefix;
2243 /* When scheduling description is not available, disable scheduler pass
2244 so it won't slow down the compilation and make x87 code slower. */
2245 if (!TARGET_SCHEDULE)
2246 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2248 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2249 set_param_value ("simultaneous-prefetches",
2250 ix86_cost->simultaneous_prefetches);
2251 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2252 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2255 /* Return true if this goes in large data/bss. */
2258 ix86_in_large_data_p (tree exp)
2260 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2263 /* Functions are never large data. */
2264 if (TREE_CODE (exp) == FUNCTION_DECL)
2267 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2269 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2270 if (strcmp (section, ".ldata") == 0
2271 || strcmp (section, ".lbss") == 0)
2277 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2279 /* If this is an incomplete type with size 0, then we can't put it
2280 in data because it might be too big when completed. */
2281 if (!size || size > ix86_section_threshold)
2288 /* Switch to the appropriate section for output of DECL.
2289 DECL is either a `VAR_DECL' node or a constant of some sort.
2290 RELOC indicates whether forming the initial value of DECL requires
2291 link-time relocations. */
2293 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2297 x86_64_elf_select_section (tree decl, int reloc,
2298 unsigned HOST_WIDE_INT align)
2300 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2301 && ix86_in_large_data_p (decl))
2303 const char *sname = NULL;
2304 unsigned int flags = SECTION_WRITE;
2305 switch (categorize_decl_for_section (decl, reloc))
2310 case SECCAT_DATA_REL:
2311 sname = ".ldata.rel";
2313 case SECCAT_DATA_REL_LOCAL:
2314 sname = ".ldata.rel.local";
2316 case SECCAT_DATA_REL_RO:
2317 sname = ".ldata.rel.ro";
2319 case SECCAT_DATA_REL_RO_LOCAL:
2320 sname = ".ldata.rel.ro.local";
2324 flags |= SECTION_BSS;
2327 case SECCAT_RODATA_MERGE_STR:
2328 case SECCAT_RODATA_MERGE_STR_INIT:
2329 case SECCAT_RODATA_MERGE_CONST:
2333 case SECCAT_SRODATA:
2340 /* We don't split these for medium model. Place them into
2341 default sections and hope for best. */
2346 /* We might get called with string constants, but get_named_section
2347 doesn't like them as they are not DECLs. Also, we need to set
2348 flags in that case. */
2350 return get_section (sname, flags, NULL);
2351 return get_named_section (decl, sname, reloc);
2354 return default_elf_select_section (decl, reloc, align);
2357 /* Build up a unique section name, expressed as a
2358 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2359 RELOC indicates whether the initial value of EXP requires
2360 link-time relocations. */
2362 static void ATTRIBUTE_UNUSED
2363 x86_64_elf_unique_section (tree decl, int reloc)
2365 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2366 && ix86_in_large_data_p (decl))
2368 const char *prefix = NULL;
2369 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2370 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2372 switch (categorize_decl_for_section (decl, reloc))
2375 case SECCAT_DATA_REL:
2376 case SECCAT_DATA_REL_LOCAL:
2377 case SECCAT_DATA_REL_RO:
2378 case SECCAT_DATA_REL_RO_LOCAL:
2379 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2382 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2385 case SECCAT_RODATA_MERGE_STR:
2386 case SECCAT_RODATA_MERGE_STR_INIT:
2387 case SECCAT_RODATA_MERGE_CONST:
2388 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2390 case SECCAT_SRODATA:
2397 /* We don't split these for medium model. Place them into
2398 default sections and hope for best. */
2406 plen = strlen (prefix);
2408 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2409 name = targetm.strip_name_encoding (name);
2410 nlen = strlen (name);
2412 string = alloca (nlen + plen + 1);
2413 memcpy (string, prefix, plen);
2414 memcpy (string + plen, name, nlen + 1);
2416 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2420 default_unique_section (decl, reloc);
2423 #ifdef COMMON_ASM_OP
2424 /* This says how to output assembler code to declare an
2425 uninitialized external linkage data object.
2427 For medium model x86-64 we need to use .largecomm opcode for
2430 x86_elf_aligned_common (FILE *file,
2431 const char *name, unsigned HOST_WIDE_INT size,
2434 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2435 && size > (unsigned int)ix86_section_threshold)
2436 fprintf (file, ".largecomm\t");
2438 fprintf (file, "%s", COMMON_ASM_OP);
2439 assemble_name (file, name);
2440 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2441 size, align / BITS_PER_UNIT);
2445 /* Utility function for targets to use in implementing
2446 ASM_OUTPUT_ALIGNED_BSS. */
2449 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2450 const char *name, unsigned HOST_WIDE_INT size,
2453 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2454 && size > (unsigned int)ix86_section_threshold)
2455 switch_to_section (get_named_section (decl, ".lbss", 0));
2457 switch_to_section (bss_section);
2458 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2459 #ifdef ASM_DECLARE_OBJECT_NAME
2460 last_assemble_variable_decl = decl;
2461 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2463 /* Standard thing is just output label for the object. */
2464 ASM_OUTPUT_LABEL (file, name);
2465 #endif /* ASM_DECLARE_OBJECT_NAME */
2466 ASM_OUTPUT_SKIP (file, size ? size : 1);
2470 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2472 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2473 make the problem with not enough registers even worse. */
2474 #ifdef INSN_SCHEDULING
2476 flag_schedule_insns = 0;
2480 /* The Darwin libraries never set errno, so we might as well
2481 avoid calling them when that's the only reason we would. */
2482 flag_errno_math = 0;
2484 /* The default values of these switches depend on the TARGET_64BIT
2485 that is not known at this moment. Mark these values with 2 and
2486 let user the to override these. In case there is no command line option
2487 specifying them, we will set the defaults in override_options. */
2489 flag_omit_frame_pointer = 2;
2490 flag_pcc_struct_return = 2;
2491 flag_asynchronous_unwind_tables = 2;
2492 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2493 SUBTARGET_OPTIMIZATION_OPTIONS;
2497 /* Decide whether we can make a sibling call to a function. DECL is the
2498 declaration of the function being targeted by the call and EXP is the
2499 CALL_EXPR representing the call. */
2502 ix86_function_ok_for_sibcall (tree decl, tree exp)
2507 /* If we are generating position-independent code, we cannot sibcall
2508 optimize any indirect call, or a direct call to a global function,
2509 as the PLT requires %ebx be live. */
2510 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2517 func = TREE_TYPE (CALL_EXPR_FN (exp));
2518 if (POINTER_TYPE_P (func))
2519 func = TREE_TYPE (func);
2522 /* Check that the return value locations are the same. Like
2523 if we are returning floats on the 80387 register stack, we cannot
2524 make a sibcall from a function that doesn't return a float to a
2525 function that does or, conversely, from a function that does return
2526 a float to a function that doesn't; the necessary stack adjustment
2527 would not be executed. This is also the place we notice
2528 differences in the return value ABI. Note that it is ok for one
2529 of the functions to have void return type as long as the return
2530 value of the other is passed in a register. */
2531 a = ix86_function_value (TREE_TYPE (exp), func, false);
2532 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2534 if (STACK_REG_P (a) || STACK_REG_P (b))
2536 if (!rtx_equal_p (a, b))
2539 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2541 else if (!rtx_equal_p (a, b))
2544 /* If this call is indirect, we'll need to be able to use a call-clobbered
2545 register for the address of the target function. Make sure that all
2546 such registers are not used for passing parameters. */
2547 if (!decl && !TARGET_64BIT)
2551 /* We're looking at the CALL_EXPR, we need the type of the function. */
2552 type = CALL_EXPR_FN (exp); /* pointer expression */
2553 type = TREE_TYPE (type); /* pointer type */
2554 type = TREE_TYPE (type); /* function type */
2556 if (ix86_function_regparm (type, NULL) >= 3)
2558 /* ??? Need to count the actual number of registers to be used,
2559 not the possible number of registers. Fix later. */
2564 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2565 /* Dllimport'd functions are also called indirectly. */
2566 if (decl && DECL_DLLIMPORT_P (decl)
2567 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2571 /* If we forced aligned the stack, then sibcalling would unalign the
2572 stack, which may break the called function. */
2573 if (cfun->machine->force_align_arg_pointer)
2576 /* Otherwise okay. That also includes certain types of indirect calls. */
2580 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2581 calling convention attributes;
2582 arguments as in struct attribute_spec.handler. */
2585 ix86_handle_cconv_attribute (tree *node, tree name,
2587 int flags ATTRIBUTE_UNUSED,
2590 if (TREE_CODE (*node) != FUNCTION_TYPE
2591 && TREE_CODE (*node) != METHOD_TYPE
2592 && TREE_CODE (*node) != FIELD_DECL
2593 && TREE_CODE (*node) != TYPE_DECL)
2595 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2596 IDENTIFIER_POINTER (name));
2597 *no_add_attrs = true;
2601 /* Can combine regparm with all attributes but fastcall. */
2602 if (is_attribute_p ("regparm", name))
2606 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2608 error ("fastcall and regparm attributes are not compatible");
2611 cst = TREE_VALUE (args);
2612 if (TREE_CODE (cst) != INTEGER_CST)
2614 warning (OPT_Wattributes,
2615 "%qs attribute requires an integer constant argument",
2616 IDENTIFIER_POINTER (name));
2617 *no_add_attrs = true;
2619 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2621 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2622 IDENTIFIER_POINTER (name), REGPARM_MAX);
2623 *no_add_attrs = true;
2627 && lookup_attribute (ix86_force_align_arg_pointer_string,
2628 TYPE_ATTRIBUTES (*node))
2629 && compare_tree_int (cst, REGPARM_MAX-1))
2631 error ("%s functions limited to %d register parameters",
2632 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2640 warning (OPT_Wattributes, "%qs attribute ignored",
2641 IDENTIFIER_POINTER (name));
2642 *no_add_attrs = true;
2646 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2647 if (is_attribute_p ("fastcall", name))
2649 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2651 error ("fastcall and cdecl attributes are not compatible");
2653 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2655 error ("fastcall and stdcall attributes are not compatible");
2657 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2659 error ("fastcall and regparm attributes are not compatible");
2663 /* Can combine stdcall with fastcall (redundant), regparm and
2665 else if (is_attribute_p ("stdcall", name))
2667 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2669 error ("stdcall and cdecl attributes are not compatible");
2671 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2673 error ("stdcall and fastcall attributes are not compatible");
2677 /* Can combine cdecl with regparm and sseregparm. */
2678 else if (is_attribute_p ("cdecl", name))
2680 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2682 error ("stdcall and cdecl attributes are not compatible");
2684 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2686 error ("fastcall and cdecl attributes are not compatible");
2690 /* Can combine sseregparm with all attributes. */
2695 /* Return 0 if the attributes for two types are incompatible, 1 if they
2696 are compatible, and 2 if they are nearly compatible (which causes a
2697 warning to be generated). */
2700 ix86_comp_type_attributes (tree type1, tree type2)
2702 /* Check for mismatch of non-default calling convention. */
2703 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2705 if (TREE_CODE (type1) != FUNCTION_TYPE)
2708 /* Check for mismatched fastcall/regparm types. */
2709 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2710 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2711 || (ix86_function_regparm (type1, NULL)
2712 != ix86_function_regparm (type2, NULL)))
2715 /* Check for mismatched sseregparm types. */
2716 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2717 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2720 /* Check for mismatched return types (cdecl vs stdcall). */
2721 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2722 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2728 /* Return the regparm value for a function with the indicated TYPE and DECL.
2729 DECL may be NULL when calling function indirectly
2730 or considering a libcall. */
2733 ix86_function_regparm (tree type, tree decl)
2736 int regparm = ix86_regparm;
2741 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2743 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2745 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2748 /* Use register calling convention for local functions when possible. */
2749 if (decl && flag_unit_at_a_time && !profile_flag)
2751 struct cgraph_local_info *i = cgraph_local_info (decl);
2754 int local_regparm, globals = 0, regno;
2757 /* Make sure no regparm register is taken by a
2758 global register variable. */
2759 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2760 if (global_regs[local_regparm])
2763 /* We can't use regparm(3) for nested functions as these use
2764 static chain pointer in third argument. */
2765 if (local_regparm == 3
2766 && decl_function_context (decl)
2767 && !DECL_NO_STATIC_CHAIN (decl))
2770 /* If the function realigns its stackpointer, the prologue will
2771 clobber %ecx. If we've already generated code for the callee,
2772 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2773 scanning the attributes for the self-realigning property. */
2774 f = DECL_STRUCT_FUNCTION (decl);
2775 if (local_regparm == 3
2776 && (f ? !!f->machine->force_align_arg_pointer
2777 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2778 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2781 /* Each global register variable increases register preassure,
2782 so the more global reg vars there are, the smaller regparm
2783 optimization use, unless requested by the user explicitly. */
2784 for (regno = 0; regno < 6; regno++)
2785 if (global_regs[regno])
2788 = globals < local_regparm ? local_regparm - globals : 0;
2790 if (local_regparm > regparm)
2791 regparm = local_regparm;
2798 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2799 DFmode (2) arguments in SSE registers for a function with the
2800 indicated TYPE and DECL. DECL may be NULL when calling function
2801 indirectly or considering a libcall. Otherwise return 0. */
2804 ix86_function_sseregparm (tree type, tree decl)
2806 gcc_assert (!TARGET_64BIT);
2808 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2809 by the sseregparm attribute. */
2810 if (TARGET_SSEREGPARM
2811 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2816 error ("Calling %qD with attribute sseregparm without "
2817 "SSE/SSE2 enabled", decl);
2819 error ("Calling %qT with attribute sseregparm without "
2820 "SSE/SSE2 enabled", type);
2827 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2828 (and DFmode for SSE2) arguments in SSE registers. */
2829 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2831 struct cgraph_local_info *i = cgraph_local_info (decl);
2833 return TARGET_SSE2 ? 2 : 1;
2839 /* Return true if EAX is live at the start of the function. Used by
2840 ix86_expand_prologue to determine if we need special help before
2841 calling allocate_stack_worker. */
2844 ix86_eax_live_at_start_p (void)
2846 /* Cheat. Don't bother working forward from ix86_function_regparm
2847 to the function type to whether an actual argument is located in
2848 eax. Instead just look at cfg info, which is still close enough
2849 to correct at this point. This gives false positives for broken
2850 functions that might use uninitialized data that happens to be
2851 allocated in eax, but who cares? */
2852 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2855 /* Return true if TYPE has a variable argument list. */
2858 type_has_variadic_args_p (tree type)
2862 for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
2863 if (t == void_list_node)
2868 /* Value is the number of bytes of arguments automatically
2869 popped when returning from a subroutine call.
2870 FUNDECL is the declaration node of the function (as a tree),
2871 FUNTYPE is the data type of the function (as a tree),
2872 or for a library call it is an identifier node for the subroutine name.
2873 SIZE is the number of bytes of arguments passed on the stack.
2875 On the 80386, the RTD insn may be used to pop them if the number
2876 of args is fixed, but if the number is variable then the caller
2877 must pop them all. RTD can't be used for library calls now
2878 because the library is compiled with the Unix compiler.
2879 Use of RTD is a selectable option, since it is incompatible with
2880 standard Unix calling sequences. If the option is not selected,
2881 the caller must always pop the args.
2883 The attribute stdcall is equivalent to RTD on a per module basis. */
2886 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2890 /* None of the 64-bit ABIs pop arguments. */
2894 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2896 /* Cdecl functions override -mrtd, and never pop the stack. */
2897 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2899 /* Stdcall and fastcall functions will pop the stack if not
2901 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2902 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2905 if (rtd && ! type_has_variadic_args_p (funtype))
2909 /* Lose any fake structure return argument if it is passed on the stack. */
2910 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2911 && !KEEP_AGGREGATE_RETURN_POINTER)
2913 int nregs = ix86_function_regparm (funtype, fundecl);
2915 return GET_MODE_SIZE (Pmode);
2921 /* Argument support functions. */
2923 /* Return true when register may be used to pass function parameters. */
2925 ix86_function_arg_regno_p (int regno)
2932 return (regno < REGPARM_MAX
2933 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2935 return (regno < REGPARM_MAX
2936 || (TARGET_MMX && MMX_REGNO_P (regno)
2937 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2938 || (TARGET_SSE && SSE_REGNO_P (regno)
2939 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2944 if (SSE_REGNO_P (regno) && TARGET_SSE)
2949 if (TARGET_SSE && SSE_REGNO_P (regno)
2950 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2954 /* RAX is used as hidden argument to va_arg functions. */
2958 for (i = 0; i < REGPARM_MAX; i++)
2959 if (regno == x86_64_int_parameter_registers[i])
2964 /* Return if we do not know how to pass TYPE solely in registers. */
2967 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2969 if (must_pass_in_stack_var_size_or_pad (mode, type))
2972 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2973 The layout_type routine is crafty and tries to trick us into passing
2974 currently unsupported vector types on the stack by using TImode. */
2975 return (!TARGET_64BIT && mode == TImode
2976 && type && TREE_CODE (type) != VECTOR_TYPE);
2979 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2980 for a call to a function whose data type is FNTYPE.
2981 For a library call, FNTYPE is 0. */
2984 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2985 tree fntype, /* tree ptr for function decl */
2986 rtx libname, /* SYMBOL_REF of library name or 0 */
2989 memset (cum, 0, sizeof (*cum));
2991 /* Set up the number of registers to use for passing arguments. */
2992 cum->nregs = ix86_regparm;
2994 cum->sse_nregs = SSE_REGPARM_MAX;
2996 cum->mmx_nregs = MMX_REGPARM_MAX;
2997 cum->warn_sse = true;
2998 cum->warn_mmx = true;
2999 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3003 /* If there are variable arguments, then we won't pass anything
3004 in registers in 32-bit mode. */
3005 if (cum->maybe_vaarg)
3015 /* Use ecx and edx registers if function has fastcall attribute,
3016 else look for regparm information. */
3019 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3025 cum->nregs = ix86_function_regparm (fntype, fndecl);
3028 /* Set up the number of SSE registers used for passing SFmode
3029 and DFmode arguments. Warn for mismatching ABI. */
3030 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3034 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3035 But in the case of vector types, it is some vector mode.
3037 When we have only some of our vector isa extensions enabled, then there
3038 are some modes for which vector_mode_supported_p is false. For these
3039 modes, the generic vector support in gcc will choose some non-vector mode
3040 in order to implement the type. By computing the natural mode, we'll
3041 select the proper ABI location for the operand and not depend on whatever
3042 the middle-end decides to do with these vector types. */
3044 static enum machine_mode
3045 type_natural_mode (tree type)
3047 enum machine_mode mode = TYPE_MODE (type);
3049 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3051 HOST_WIDE_INT size = int_size_in_bytes (type);
3052 if ((size == 8 || size == 16)
3053 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3054 && TYPE_VECTOR_SUBPARTS (type) > 1)
3056 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3058 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3059 mode = MIN_MODE_VECTOR_FLOAT;
3061 mode = MIN_MODE_VECTOR_INT;
3063 /* Get the mode which has this inner mode and number of units. */
3064 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3065 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3066 && GET_MODE_INNER (mode) == innermode)
3076 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3077 this may not agree with the mode that the type system has chosen for the
3078 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3079 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3082 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3087 if (orig_mode != BLKmode)
3088 tmp = gen_rtx_REG (orig_mode, regno);
3091 tmp = gen_rtx_REG (mode, regno);
3092 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3093 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3099 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3100 of this code is to classify each 8bytes of incoming argument by the register
3101 class and assign registers accordingly. */
3103 /* Return the union class of CLASS1 and CLASS2.
3104 See the x86-64 PS ABI for details. */
3106 static enum x86_64_reg_class
3107 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3109 /* Rule #1: If both classes are equal, this is the resulting class. */
3110 if (class1 == class2)
3113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3115 if (class1 == X86_64_NO_CLASS)
3117 if (class2 == X86_64_NO_CLASS)
3120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3121 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3122 return X86_64_MEMORY_CLASS;
3124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3125 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3126 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3127 return X86_64_INTEGERSI_CLASS;
3128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3130 return X86_64_INTEGER_CLASS;
3132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3134 if (class1 == X86_64_X87_CLASS
3135 || class1 == X86_64_X87UP_CLASS
3136 || class1 == X86_64_COMPLEX_X87_CLASS
3137 || class2 == X86_64_X87_CLASS
3138 || class2 == X86_64_X87UP_CLASS
3139 || class2 == X86_64_COMPLEX_X87_CLASS)
3140 return X86_64_MEMORY_CLASS;
3142 /* Rule #6: Otherwise class SSE is used. */
3143 return X86_64_SSE_CLASS;
3146 /* Classify the argument of type TYPE and mode MODE.
3147 CLASSES will be filled by the register class used to pass each word
3148 of the operand. The number of words is returned. In case the parameter
3149 should be passed in memory, 0 is returned. As a special case for zero
3150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3152 BIT_OFFSET is used internally for handling records and specifies offset
3153 of the offset in bits modulo 256 to avoid overflow cases.
3155 See the x86-64 PS ABI for details.
3159 classify_argument (enum machine_mode mode, tree type,
3160 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3162 HOST_WIDE_INT bytes =
3163 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3164 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3166 /* Variable sized entities are always passed/returned in memory. */
3170 if (mode != VOIDmode
3171 && targetm.calls.must_pass_in_stack (mode, type))
3174 if (type && AGGREGATE_TYPE_P (type))
3178 enum x86_64_reg_class subclasses[MAX_CLASSES];
3180 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3184 for (i = 0; i < words; i++)
3185 classes[i] = X86_64_NO_CLASS;
3187 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3188 signalize memory class, so handle it as special case. */
3191 classes[0] = X86_64_NO_CLASS;
3195 /* Classify each field of record and merge classes. */
3196 switch (TREE_CODE (type))
3199 /* And now merge the fields of structure. */
3200 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3202 if (TREE_CODE (field) == FIELD_DECL)
3206 if (TREE_TYPE (field) == error_mark_node)
3209 /* Bitfields are always classified as integer. Handle them
3210 early, since later code would consider them to be
3211 misaligned integers. */
3212 if (DECL_BIT_FIELD (field))
3214 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3215 i < ((int_bit_position (field) + (bit_offset % 64))
3216 + tree_low_cst (DECL_SIZE (field), 0)
3219 merge_classes (X86_64_INTEGER_CLASS,
3224 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3225 TREE_TYPE (field), subclasses,
3226 (int_bit_position (field)
3227 + bit_offset) % 256);
3230 for (i = 0; i < num; i++)
3233 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3235 merge_classes (subclasses[i], classes[i + pos]);
3243 /* Arrays are handled as small records. */
3246 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3247 TREE_TYPE (type), subclasses, bit_offset);
3251 /* The partial classes are now full classes. */
3252 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3253 subclasses[0] = X86_64_SSE_CLASS;
3254 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3255 subclasses[0] = X86_64_INTEGER_CLASS;
3257 for (i = 0; i < words; i++)
3258 classes[i] = subclasses[i % num];
3263 case QUAL_UNION_TYPE:
3264 /* Unions are similar to RECORD_TYPE but offset is always 0.
3266 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3268 if (TREE_CODE (field) == FIELD_DECL)
3272 if (TREE_TYPE (field) == error_mark_node)
3275 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3276 TREE_TYPE (field), subclasses,
3280 for (i = 0; i < num; i++)
3281 classes[i] = merge_classes (subclasses[i], classes[i]);
3290 /* Final merger cleanup. */
3291 for (i = 0; i < words; i++)
3293 /* If one class is MEMORY, everything should be passed in
3295 if (classes[i] == X86_64_MEMORY_CLASS)
3298 /* The X86_64_SSEUP_CLASS should be always preceded by
3299 X86_64_SSE_CLASS. */
3300 if (classes[i] == X86_64_SSEUP_CLASS
3301 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3302 classes[i] = X86_64_SSE_CLASS;
3304 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3305 if (classes[i] == X86_64_X87UP_CLASS
3306 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3307 classes[i] = X86_64_SSE_CLASS;
3312 /* Compute alignment needed. We align all types to natural boundaries with
3313 exception of XFmode that is aligned to 64bits. */
3314 if (mode != VOIDmode && mode != BLKmode)
3316 int mode_alignment = GET_MODE_BITSIZE (mode);
3319 mode_alignment = 128;
3320 else if (mode == XCmode)
3321 mode_alignment = 256;
3322 if (COMPLEX_MODE_P (mode))
3323 mode_alignment /= 2;
3324 /* Misaligned fields are always returned in memory. */
3325 if (bit_offset % mode_alignment)
3329 /* for V1xx modes, just use the base mode */
3330 if (VECTOR_MODE_P (mode)
3331 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3332 mode = GET_MODE_INNER (mode);
3334 /* Classification of atomic types. */
3339 classes[0] = X86_64_SSE_CLASS;
3342 classes[0] = X86_64_SSE_CLASS;
3343 classes[1] = X86_64_SSEUP_CLASS;
3352 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3353 classes[0] = X86_64_INTEGERSI_CLASS;
3355 classes[0] = X86_64_INTEGER_CLASS;
3359 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3364 if (!(bit_offset % 64))
3365 classes[0] = X86_64_SSESF_CLASS;
3367 classes[0] = X86_64_SSE_CLASS;
3370 classes[0] = X86_64_SSEDF_CLASS;
3373 classes[0] = X86_64_X87_CLASS;
3374 classes[1] = X86_64_X87UP_CLASS;
3377 classes[0] = X86_64_SSE_CLASS;
3378 classes[1] = X86_64_SSEUP_CLASS;
3381 classes[0] = X86_64_SSE_CLASS;
3384 classes[0] = X86_64_SSEDF_CLASS;
3385 classes[1] = X86_64_SSEDF_CLASS;
3388 classes[0] = X86_64_COMPLEX_X87_CLASS;
3391 /* This modes is larger than 16 bytes. */
3399 classes[0] = X86_64_SSE_CLASS;
3400 classes[1] = X86_64_SSEUP_CLASS;
3406 classes[0] = X86_64_SSE_CLASS;
3412 gcc_assert (VECTOR_MODE_P (mode));
3417 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3419 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3420 classes[0] = X86_64_INTEGERSI_CLASS;
3422 classes[0] = X86_64_INTEGER_CLASS;
3423 classes[1] = X86_64_INTEGER_CLASS;
3424 return 1 + (bytes > 8);
3428 /* Examine the argument and return set number of register required in each
3429 class. Return 0 iff parameter should be passed in memory. */
3431 examine_argument (enum machine_mode mode, tree type, int in_return,
3432 int *int_nregs, int *sse_nregs)
3434 enum x86_64_reg_class class[MAX_CLASSES];
3435 int n = classify_argument (mode, type, class, 0);
3441 for (n--; n >= 0; n--)
3444 case X86_64_INTEGER_CLASS:
3445 case X86_64_INTEGERSI_CLASS:
3448 case X86_64_SSE_CLASS:
3449 case X86_64_SSESF_CLASS:
3450 case X86_64_SSEDF_CLASS:
3453 case X86_64_NO_CLASS:
3454 case X86_64_SSEUP_CLASS:
3456 case X86_64_X87_CLASS:
3457 case X86_64_X87UP_CLASS:
3461 case X86_64_COMPLEX_X87_CLASS:
3462 return in_return ? 2 : 0;
3463 case X86_64_MEMORY_CLASS:
3469 /* Construct container for the argument used by GCC interface. See
3470 FUNCTION_ARG for the detailed description. */
3473 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3474 tree type, int in_return, int nintregs, int nsseregs,
3475 const int *intreg, int sse_regno)
3477 /* The following variables hold the static issued_error state. */
3478 static bool issued_sse_arg_error;
3479 static bool issued_sse_ret_error;
3480 static bool issued_x87_ret_error;
3482 enum machine_mode tmpmode;
3484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3485 enum x86_64_reg_class class[MAX_CLASSES];
3489 int needed_sseregs, needed_intregs;
3490 rtx exp[MAX_CLASSES];
3493 n = classify_argument (mode, type, class, 0);
3496 if (!examine_argument (mode, type, in_return, &needed_intregs,
3499 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3502 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3503 some less clueful developer tries to use floating-point anyway. */
3504 if (needed_sseregs && !TARGET_SSE)
3508 if (!issued_sse_ret_error)
3510 error ("SSE register return with SSE disabled");
3511 issued_sse_ret_error = true;
3514 else if (!issued_sse_arg_error)
3516 error ("SSE register argument with SSE disabled");
3517 issued_sse_arg_error = true;
3522 /* Likewise, error if the ABI requires us to return values in the
3523 x87 registers and the user specified -mno-80387. */
3524 if (!TARGET_80387 && in_return)
3525 for (i = 0; i < n; i++)
3526 if (class[i] == X86_64_X87_CLASS
3527 || class[i] == X86_64_X87UP_CLASS
3528 || class[i] == X86_64_COMPLEX_X87_CLASS)
3530 if (!issued_x87_ret_error)
3532 error ("x87 register return with x87 disabled");
3533 issued_x87_ret_error = true;
3538 /* First construct simple cases. Avoid SCmode, since we want to use
3539 single register to pass this type. */
3540 if (n == 1 && mode != SCmode)
3543 case X86_64_INTEGER_CLASS:
3544 case X86_64_INTEGERSI_CLASS:
3545 return gen_rtx_REG (mode, intreg[0]);
3546 case X86_64_SSE_CLASS:
3547 case X86_64_SSESF_CLASS:
3548 case X86_64_SSEDF_CLASS:
3549 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3550 case X86_64_X87_CLASS:
3551 case X86_64_COMPLEX_X87_CLASS:
3552 return gen_rtx_REG (mode, FIRST_STACK_REG);
3553 case X86_64_NO_CLASS:
3554 /* Zero sized array, struct or class. */
3559 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3561 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3564 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3565 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3566 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3567 && class[1] == X86_64_INTEGER_CLASS
3568 && (mode == CDImode || mode == TImode || mode == TFmode)
3569 && intreg[0] + 1 == intreg[1])
3570 return gen_rtx_REG (mode, intreg[0]);
3572 /* Otherwise figure out the entries of the PARALLEL. */
3573 for (i = 0; i < n; i++)
3577 case X86_64_NO_CLASS:
3579 case X86_64_INTEGER_CLASS:
3580 case X86_64_INTEGERSI_CLASS:
3581 /* Merge TImodes on aligned occasions here too. */
3582 if (i * 8 + 8 > bytes)
3583 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3584 else if (class[i] == X86_64_INTEGERSI_CLASS)
3588 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3589 if (tmpmode == BLKmode)
3591 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3592 gen_rtx_REG (tmpmode, *intreg),
3596 case X86_64_SSESF_CLASS:
3597 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3598 gen_rtx_REG (SFmode,
3599 SSE_REGNO (sse_regno)),
3603 case X86_64_SSEDF_CLASS:
3604 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3605 gen_rtx_REG (DFmode,
3606 SSE_REGNO (sse_regno)),
3610 case X86_64_SSE_CLASS:
3611 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3615 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3616 gen_rtx_REG (tmpmode,
3617 SSE_REGNO (sse_regno)),
3619 if (tmpmode == TImode)
3628 /* Empty aligned struct, union or class. */
3632 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3633 for (i = 0; i < nexps; i++)
3634 XVECEXP (ret, 0, i) = exp [i];
3638 /* Update the data in CUM to advance over an argument of mode MODE
3639 and data type TYPE. (TYPE is null for libcalls where that information
3640 may not be available.) */
3643 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3644 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3660 cum->words += words;
3661 cum->nregs -= words;
3662 cum->regno += words;
3664 if (cum->nregs <= 0)
3672 if (cum->float_in_sse < 2)
3675 if (cum->float_in_sse < 1)
3686 if (!type || !AGGREGATE_TYPE_P (type))
3688 cum->sse_words += words;
3689 cum->sse_nregs -= 1;
3690 cum->sse_regno += 1;
3691 if (cum->sse_nregs <= 0)
3703 if (!type || !AGGREGATE_TYPE_P (type))
3705 cum->mmx_words += words;
3706 cum->mmx_nregs -= 1;
3707 cum->mmx_regno += 1;
3708 if (cum->mmx_nregs <= 0)
3719 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3720 tree type, HOST_WIDE_INT words)
3722 int int_nregs, sse_nregs;
3724 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3725 cum->words += words;
3726 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3728 cum->nregs -= int_nregs;
3729 cum->sse_nregs -= sse_nregs;
3730 cum->regno += int_nregs;
3731 cum->sse_regno += sse_nregs;
3734 cum->words += words;
3738 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3739 tree type, int named ATTRIBUTE_UNUSED)
3741 HOST_WIDE_INT bytes, words;
3743 if (mode == BLKmode)
3744 bytes = int_size_in_bytes (type);
3746 bytes = GET_MODE_SIZE (mode);
3747 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3750 mode = type_natural_mode (type);
3753 function_arg_advance_64 (cum, mode, type, words);
3755 function_arg_advance_32 (cum, mode, type, bytes, words);
3758 /* Define where to put the arguments to a function.
3759 Value is zero to push the argument on the stack,
3760 or a hard register in which to store the argument.
3762 MODE is the argument's machine mode.
3763 TYPE is the data type of the argument (as a tree).
3764 This is null for libcalls where that information may
3766 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3767 the preceding args and about the function being called.
3768 NAMED is nonzero if this argument is a named parameter
3769 (otherwise it is an extra parameter matching an ellipsis). */
3772 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3773 enum machine_mode orig_mode, tree type,
3774 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3776 static bool warnedsse, warnedmmx;
3778 /* Avoid the AL settings for the Unix64 ABI. */
3779 if (mode == VOIDmode)
3795 if (words <= cum->nregs)
3797 int regno = cum->regno;
3799 /* Fastcall allocates the first two DWORD (SImode) or
3800 smaller arguments to ECX and EDX. */
3803 if (mode == BLKmode || mode == DImode)
3806 /* ECX not EAX is the first allocated register. */
3810 return gen_rtx_REG (mode, regno);
3815 if (cum->float_in_sse < 2)
3818 if (cum->float_in_sse < 1)
3828 if (!type || !AGGREGATE_TYPE_P (type))
3830 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3833 warning (0, "SSE vector argument without SSE enabled "
3837 return gen_reg_or_parallel (mode, orig_mode,
3838 cum->sse_regno + FIRST_SSE_REG);
3846 if (!type || !AGGREGATE_TYPE_P (type))
3848 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3851 warning (0, "MMX vector argument without MMX enabled "
3855 return gen_reg_or_parallel (mode, orig_mode,
3856 cum->mmx_regno + FIRST_MMX_REG);
3865 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3866 enum machine_mode orig_mode, tree type)
3868 /* Handle a hidden AL argument containing number of registers
3869 for varargs x86-64 functions. */
3870 if (mode == VOIDmode)
3871 return GEN_INT (cum->maybe_vaarg
3872 ? (cum->sse_nregs < 0
3877 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3879 &x86_64_int_parameter_registers [cum->regno],
3884 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3885 tree type, int named ATTRIBUTE_UNUSED)
3887 enum machine_mode mode = omode;
3888 HOST_WIDE_INT bytes, words;
3890 if (mode == BLKmode)
3891 bytes = int_size_in_bytes (type);
3893 bytes = GET_MODE_SIZE (mode);
3894 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3896 /* To simplify the code below, represent vector types with a vector mode
3897 even if MMX/SSE are not active. */
3898 if (type && TREE_CODE (type) == VECTOR_TYPE)
3899 mode = type_natural_mode (type);
3902 return function_arg_64 (cum, mode, omode, type);
3904 return function_arg_32 (cum, mode, omode, type, bytes, words);
3907 /* A C expression that indicates when an argument must be passed by
3908 reference. If nonzero for an argument, a copy of that argument is
3909 made in memory and a pointer to the argument is passed instead of
3910 the argument itself. The pointer is passed in whatever way is
3911 appropriate for passing a pointer to that type. */
3914 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3915 enum machine_mode mode ATTRIBUTE_UNUSED,
3916 tree type, bool named ATTRIBUTE_UNUSED)
3918 if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
3924 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3925 ABI. Only called if TARGET_SSE. */
3927 contains_128bit_aligned_vector_p (tree type)
3929 enum machine_mode mode = TYPE_MODE (type);
3930 if (SSE_REG_MODE_P (mode)
3931 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3933 if (TYPE_ALIGN (type) < 128)
3936 if (AGGREGATE_TYPE_P (type))
3938 /* Walk the aggregates recursively. */
3939 switch (TREE_CODE (type))
3943 case QUAL_UNION_TYPE:
3947 /* Walk all the structure fields. */
3948 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3950 if (TREE_CODE (field) == FIELD_DECL
3951 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3958 /* Just for use if some languages passes arrays by value. */
3959 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3970 /* Gives the alignment boundary, in bits, of an argument with the
3971 specified mode and type. */
3974 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3978 align = TYPE_ALIGN (type);
3980 align = GET_MODE_ALIGNMENT (mode);
3981 if (align < PARM_BOUNDARY)
3982 align = PARM_BOUNDARY;
3985 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3986 make an exception for SSE modes since these require 128bit
3989 The handling here differs from field_alignment. ICC aligns MMX
3990 arguments to 4 byte boundaries, while structure fields are aligned
3991 to 8 byte boundaries. */
3993 align = PARM_BOUNDARY;
3996 if (!SSE_REG_MODE_P (mode))
3997 align = PARM_BOUNDARY;
4001 if (!contains_128bit_aligned_vector_p (type))
4002 align = PARM_BOUNDARY;
4010 /* Return true if N is a possible register number of function value. */
4013 ix86_function_value_regno_p (int regno)
4020 case FIRST_FLOAT_REG:
4021 return TARGET_FLOAT_RETURNS_IN_80387;
4027 if (TARGET_MACHO || TARGET_64BIT)
4035 /* Define how to find the value returned by a function.
4036 VALTYPE is the data type of the value (as a tree).
4037 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4038 otherwise, FUNC is 0. */
4041 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4042 tree fntype, tree fn)
4046 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4047 we normally prevent this case when mmx is not available. However
4048 some ABIs may require the result to be returned like DImode. */
4049 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4050 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4052 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4053 we prevent this case when sse is not available. However some ABIs
4054 may require the result to be returned like integer TImode. */
4055 else if (mode == TImode
4056 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4057 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4059 /* Decimal floating point values can go in %eax, unlike other float modes. */
4060 else if (DECIMAL_FLOAT_MODE_P (mode))
4063 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4064 else if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4067 /* Floating point return values in %st(0), except for local functions when
4068 SSE math is enabled or for functions with sseregparm attribute. */
4071 regno = FIRST_FLOAT_REG;
4073 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4075 int sse_level = ix86_function_sseregparm (fntype, fn);
4076 if ((sse_level >= 1 && mode == SFmode)
4077 || (sse_level == 2 && mode == DFmode))
4078 regno = FIRST_SSE_REG;
4082 return gen_rtx_REG (orig_mode, regno);
4086 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4091 /* Handle libcalls, which don't provide a type node. */
4092 if (valtype == NULL)
4104 return gen_rtx_REG (mode, FIRST_SSE_REG);
4107 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4111 return gen_rtx_REG (mode, 0);
4115 ret = construct_container (mode, orig_mode, valtype, 1,
4116 REGPARM_MAX, SSE_REGPARM_MAX,
4117 x86_64_int_return_registers, 0);
4119 /* For zero sized structures, construct_container returns NULL, but we
4120 need to keep rest of compiler happy by returning meaningful value. */
4122 ret = gen_rtx_REG (orig_mode, 0);
4128 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4129 enum machine_mode orig_mode, enum machine_mode mode)
4134 if (fntype_or_decl && DECL_P (fntype_or_decl))
4135 fn = fntype_or_decl;
4136 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4139 return function_value_64 (orig_mode, mode, valtype);
4141 return function_value_32 (orig_mode, mode, fntype, fn);
4145 ix86_function_value (tree valtype, tree fntype_or_decl,
4146 bool outgoing ATTRIBUTE_UNUSED)
4148 enum machine_mode mode, orig_mode;
4150 orig_mode = TYPE_MODE (valtype);
4151 mode = type_natural_mode (valtype);
4152 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4156 ix86_libcall_value (enum machine_mode mode)
4158 return ix86_function_value_1 (NULL, NULL, mode, mode);
4161 /* Return true iff type is returned in memory. */
4164 return_in_memory_32 (tree type, enum machine_mode mode)
4168 if (mode == BLKmode)
4171 size = int_size_in_bytes (type);
4173 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4176 if (VECTOR_MODE_P (mode) || mode == TImode)
4178 /* User-created vectors small enough to fit in EAX. */
4182 /* MMX/3dNow values are returned in MM0,
4183 except when it doesn't exits. */
4185 return (TARGET_MMX ? 0 : 1);
4187 /* SSE values are returned in XMM0, except when it doesn't exist. */
4189 return (TARGET_SSE ? 0 : 1);
4204 return_in_memory_64 (tree type, enum machine_mode mode)
4206 int needed_intregs, needed_sseregs;
4207 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4211 ix86_return_in_memory (tree type)
4213 enum machine_mode mode = type_natural_mode (type);
4216 return return_in_memory_64 (type, mode);
4218 return return_in_memory_32 (type, mode);
4221 /* When returning SSE vector types, we have a choice of either
4222 (1) being abi incompatible with a -march switch, or
4223 (2) generating an error.
4224 Given no good solution, I think the safest thing is one warning.
4225 The user won't be able to use -Werror, but....
4227 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4228 called in response to actually generating a caller or callee that
4229 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4230 via aggregate_value_p for general type probing from tree-ssa. */
4233 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4235 static bool warnedsse, warnedmmx;
4237 if (!TARGET_64BIT && type)
4239 /* Look at the return type of the function, not the function type. */
4240 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4242 if (!TARGET_SSE && !warnedsse)
4245 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4248 warning (0, "SSE vector return without SSE enabled "
4253 if (!TARGET_MMX && !warnedmmx)
4255 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4258 warning (0, "MMX vector return without MMX enabled "
4268 /* Create the va_list data type. */
4271 ix86_build_builtin_va_list (void)
4273 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4275 /* For i386 we use plain pointer to argument area. */
4277 return build_pointer_type (char_type_node);
4279 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4280 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4282 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4283 unsigned_type_node);
4284 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4285 unsigned_type_node);
4286 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4288 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4291 va_list_gpr_counter_field = f_gpr;
4292 va_list_fpr_counter_field = f_fpr;
4294 DECL_FIELD_CONTEXT (f_gpr) = record;
4295 DECL_FIELD_CONTEXT (f_fpr) = record;
4296 DECL_FIELD_CONTEXT (f_ovf) = record;
4297 DECL_FIELD_CONTEXT (f_sav) = record;
4299 TREE_CHAIN (record) = type_decl;
4300 TYPE_NAME (record) = type_decl;
4301 TYPE_FIELDS (record) = f_gpr;
4302 TREE_CHAIN (f_gpr) = f_fpr;
4303 TREE_CHAIN (f_fpr) = f_ovf;
4304 TREE_CHAIN (f_ovf) = f_sav;
4306 layout_type (record);
4308 /* The correct type is an array type of one element. */
4309 return build_array_type (record, build_index_type (size_zero_node));
4312 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4315 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4325 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4328 /* Indicate to allocate space on the stack for varargs save area. */
4329 ix86_save_varrargs_registers = 1;
4330 cfun->stack_alignment_needed = 128;
4332 save_area = frame_pointer_rtx;
4333 set = get_varargs_alias_set ();
4335 for (i = cum->regno;
4337 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4340 mem = gen_rtx_MEM (Pmode,
4341 plus_constant (save_area, i * UNITS_PER_WORD));
4342 MEM_NOTRAP_P (mem) = 1;
4343 set_mem_alias_set (mem, set);
4344 emit_move_insn (mem, gen_rtx_REG (Pmode,
4345 x86_64_int_parameter_registers[i]));
4348 if (cum->sse_nregs && cfun->va_list_fpr_size)
4350 /* Now emit code to save SSE registers. The AX parameter contains number
4351 of SSE parameter registers used to call this function. We use
4352 sse_prologue_save insn template that produces computed jump across
4353 SSE saves. We need some preparation work to get this working. */
4355 label = gen_label_rtx ();
4356 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4358 /* Compute address to jump to :
4359 label - 5*eax + nnamed_sse_arguments*5 */
4360 tmp_reg = gen_reg_rtx (Pmode);
4361 nsse_reg = gen_reg_rtx (Pmode);
4362 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4363 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4364 gen_rtx_MULT (Pmode, nsse_reg,
4369 gen_rtx_CONST (DImode,
4370 gen_rtx_PLUS (DImode,
4372 GEN_INT (cum->sse_regno * 4))));
4374 emit_move_insn (nsse_reg, label_ref);
4375 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4377 /* Compute address of memory block we save into. We always use pointer
4378 pointing 127 bytes after first byte to store - this is needed to keep
4379 instruction size limited by 4 bytes. */
4380 tmp_reg = gen_reg_rtx (Pmode);
4381 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4382 plus_constant (save_area,
4383 8 * REGPARM_MAX + 127)));
4384 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4385 MEM_NOTRAP_P (mem) = 1;
4386 set_mem_alias_set (mem, set);
4387 set_mem_align (mem, BITS_PER_WORD);
4389 /* And finally do the dirty job! */
4390 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4391 GEN_INT (cum->sse_regno), label));
4396 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4397 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4400 CUMULATIVE_ARGS next_cum;
4404 /* This argument doesn't appear to be used anymore. Which is good,
4405 because the old code here didn't suppress rtl generation. */
4406 gcc_assert (!no_rtl);
4411 fntype = TREE_TYPE (current_function_decl);
4412 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4413 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4414 != void_type_node));
4416 /* For varargs, we do not want to skip the dummy va_dcl argument.
4417 For stdargs, we do want to skip the last named argument. */
4420 function_arg_advance (&next_cum, mode, type, 1);
4422 setup_incoming_varargs_64 (&next_cum);
4425 /* Implement va_start. */
4428 ix86_va_start (tree valist, rtx nextarg)
4430 HOST_WIDE_INT words, n_gpr, n_fpr;
4431 tree f_gpr, f_fpr, f_ovf, f_sav;
4432 tree gpr, fpr, ovf, sav, t;
4435 /* Only 64bit target needs something special. */
4438 std_expand_builtin_va_start (valist, nextarg);
4442 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4443 f_fpr = TREE_CHAIN (f_gpr);
4444 f_ovf = TREE_CHAIN (f_fpr);
4445 f_sav = TREE_CHAIN (f_ovf);
4447 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4448 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4449 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4450 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4451 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4453 /* Count number of gp and fp argument registers used. */
4454 words = current_function_args_info.words;
4455 n_gpr = current_function_args_info.regno;
4456 n_fpr = current_function_args_info.sse_regno;
4458 if (cfun->va_list_gpr_size)
4460 type = TREE_TYPE (gpr);
4461 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4462 build_int_cst (type, n_gpr * 8));
4463 TREE_SIDE_EFFECTS (t) = 1;
4464 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4467 if (cfun->va_list_fpr_size)
4469 type = TREE_TYPE (fpr);
4470 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4471 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4472 TREE_SIDE_EFFECTS (t) = 1;
4473 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4476 /* Find the overflow area. */
4477 type = TREE_TYPE (ovf);
4478 t = make_tree (type, virtual_incoming_args_rtx);
4480 t = build2 (PLUS_EXPR, type, t,
4481 build_int_cst (type, words * UNITS_PER_WORD));
4482 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4483 TREE_SIDE_EFFECTS (t) = 1;
4484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4486 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4488 /* Find the register save area.
4489 Prologue of the function save it right above stack frame. */
4490 type = TREE_TYPE (sav);
4491 t = make_tree (type, frame_pointer_rtx);
4492 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4493 TREE_SIDE_EFFECTS (t) = 1;
4494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4498 /* Implement va_arg. */
4501 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4503 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4504 tree f_gpr, f_fpr, f_ovf, f_sav;
4505 tree gpr, fpr, ovf, sav, t;
4507 tree lab_false, lab_over = NULL_TREE;
4512 enum machine_mode nat_mode;
4514 /* Only 64bit target needs something special. */
4516 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4518 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4519 f_fpr = TREE_CHAIN (f_gpr);
4520 f_ovf = TREE_CHAIN (f_fpr);
4521 f_sav = TREE_CHAIN (f_ovf);
4523 valist = build_va_arg_indirect_ref (valist);
4524 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4525 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4526 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4527 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4529 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4531 type = build_pointer_type (type);
4532 size = int_size_in_bytes (type);
4533 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4535 nat_mode = type_natural_mode (type);
4536 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4537 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4539 /* Pull the value out of the saved registers. */
4541 addr = create_tmp_var (ptr_type_node, "addr");
4542 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4546 int needed_intregs, needed_sseregs;
4548 tree int_addr, sse_addr;
4550 lab_false = create_artificial_label ();
4551 lab_over = create_artificial_label ();
4553 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4555 need_temp = (!REG_P (container)
4556 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4557 || TYPE_ALIGN (type) > 128));
4559 /* In case we are passing structure, verify that it is consecutive block
4560 on the register save area. If not we need to do moves. */
4561 if (!need_temp && !REG_P (container))
4563 /* Verify that all registers are strictly consecutive */
4564 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4568 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4570 rtx slot = XVECEXP (container, 0, i);
4571 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4572 || INTVAL (XEXP (slot, 1)) != i * 16)
4580 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4582 rtx slot = XVECEXP (container, 0, i);
4583 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4584 || INTVAL (XEXP (slot, 1)) != i * 8)
4596 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4597 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4598 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4599 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4602 /* First ensure that we fit completely in registers. */
4605 t = build_int_cst (TREE_TYPE (gpr),
4606 (REGPARM_MAX - needed_intregs + 1) * 8);
4607 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4608 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4609 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4610 gimplify_and_add (t, pre_p);
4614 t = build_int_cst (TREE_TYPE (fpr),
4615 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4617 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4618 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4619 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4620 gimplify_and_add (t, pre_p);
4623 /* Compute index to start of area used for integer regs. */
4626 /* int_addr = gpr + sav; */
4627 t = fold_convert (ptr_type_node, gpr);
4628 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4629 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4630 gimplify_and_add (t, pre_p);
4634 /* sse_addr = fpr + sav; */
4635 t = fold_convert (ptr_type_node, fpr);
4636 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4637 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4638 gimplify_and_add (t, pre_p);
4643 tree temp = create_tmp_var (type, "va_arg_tmp");
4646 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4647 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4648 gimplify_and_add (t, pre_p);
4650 for (i = 0; i < XVECLEN (container, 0); i++)
4652 rtx slot = XVECEXP (container, 0, i);
4653 rtx reg = XEXP (slot, 0);
4654 enum machine_mode mode = GET_MODE (reg);
4655 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4656 tree addr_type = build_pointer_type (piece_type);
4659 tree dest_addr, dest;
4661 if (SSE_REGNO_P (REGNO (reg)))
4663 src_addr = sse_addr;
4664 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4668 src_addr = int_addr;
4669 src_offset = REGNO (reg) * 8;
4671 src_addr = fold_convert (addr_type, src_addr);
4672 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4673 size_int (src_offset));
4674 src = build_va_arg_indirect_ref (src_addr);
4676 dest_addr = fold_convert (addr_type, addr);
4677 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4678 size_int (INTVAL (XEXP (slot, 1))));
4679 dest = build_va_arg_indirect_ref (dest_addr);
4681 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4682 gimplify_and_add (t, pre_p);
4688 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4689 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4690 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4691 gimplify_and_add (t, pre_p);
4695 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4696 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4697 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4698 gimplify_and_add (t, pre_p);
4701 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4702 gimplify_and_add (t, pre_p);
4704 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4705 append_to_statement_list (t, pre_p);
4708 /* ... otherwise out of the overflow area. */
4710 /* Care for on-stack alignment if needed. */
4711 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4712 || integer_zerop (TYPE_SIZE (type)))
4716 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4717 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4718 build_int_cst (TREE_TYPE (ovf), align - 1));
4719 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4720 build_int_cst (TREE_TYPE (t), -align));
4722 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4724 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4725 gimplify_and_add (t2, pre_p);
4727 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4728 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4729 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4730 gimplify_and_add (t, pre_p);
4734 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4735 append_to_statement_list (t, pre_p);
4738 ptrtype = build_pointer_type (type);
4739 addr = fold_convert (ptrtype, addr);
4742 addr = build_va_arg_indirect_ref (addr);
4743 return build_va_arg_indirect_ref (addr);
4746 /* Return nonzero if OPNUM's MEM should be matched
4747 in movabs* patterns. */
4750 ix86_check_movabs (rtx insn, int opnum)
4754 set = PATTERN (insn);
4755 if (GET_CODE (set) == PARALLEL)
4756 set = XVECEXP (set, 0, 0);
4757 gcc_assert (GET_CODE (set) == SET);
4758 mem = XEXP (set, opnum);
4759 while (GET_CODE (mem) == SUBREG)
4760 mem = SUBREG_REG (mem);
4761 gcc_assert (MEM_P (mem));
4762 return (volatile_ok || !MEM_VOLATILE_P (mem));
4765 /* Initialize the table of extra 80387 mathematical constants. */
4768 init_ext_80387_constants (void)
4770 static const char * cst[5] =
4772 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4773 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4774 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4775 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4776 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4780 for (i = 0; i < 5; i++)
4782 real_from_string (&ext_80387_constants_table[i], cst[i]);
4783 /* Ensure each constant is rounded to XFmode precision. */
4784 real_convert (&ext_80387_constants_table[i],
4785 XFmode, &ext_80387_constants_table[i]);
4788 ext_80387_constants_init = 1;
4791 /* Return true if the constant is something that can be loaded with
4792 a special instruction. */
4795 standard_80387_constant_p (rtx x)
4799 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4802 if (x == CONST0_RTX (GET_MODE (x)))
4804 if (x == CONST1_RTX (GET_MODE (x)))
4807 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4809 /* For XFmode constants, try to find a special 80387 instruction when
4810 optimizing for size or on those CPUs that benefit from them. */
4811 if (GET_MODE (x) == XFmode
4812 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4816 if (! ext_80387_constants_init)
4817 init_ext_80387_constants ();
4819 for (i = 0; i < 5; i++)
4820 if (real_identical (&r, &ext_80387_constants_table[i]))
4824 /* Load of the constant -0.0 or -1.0 will be split as
4825 fldz;fchs or fld1;fchs sequence. */
4826 if (real_isnegzero (&r))
4828 if (real_identical (&r, &dconstm1))
4834 /* Return the opcode of the special instruction to be used to load
4838 standard_80387_constant_opcode (rtx x)
4840 switch (standard_80387_constant_p (x))
4864 /* Return the CONST_DOUBLE representing the 80387 constant that is
4865 loaded by the specified special instruction. The argument IDX
4866 matches the return value from standard_80387_constant_p. */
4869 standard_80387_constant_rtx (int idx)
4873 if (! ext_80387_constants_init)
4874 init_ext_80387_constants ();
4890 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4894 /* Return 1 if mode is a valid mode for sse. */
4896 standard_sse_mode_p (enum machine_mode mode)
4913 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4916 standard_sse_constant_p (rtx x)
4918 enum machine_mode mode = GET_MODE (x);
4920 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4922 if (vector_all_ones_operand (x, mode)
4923 && standard_sse_mode_p (mode))
4924 return TARGET_SSE2 ? 2 : -1;
4929 /* Return the opcode of the special instruction to be used to load
4933 standard_sse_constant_opcode (rtx insn, rtx x)
4935 switch (standard_sse_constant_p (x))
4938 if (get_attr_mode (insn) == MODE_V4SF)
4939 return "xorps\t%0, %0";
4940 else if (get_attr_mode (insn) == MODE_V2DF)
4941 return "xorpd\t%0, %0";
4943 return "pxor\t%0, %0";
4945 return "pcmpeqd\t%0, %0";
4950 /* Returns 1 if OP contains a symbol reference */
4953 symbolic_reference_mentioned_p (rtx op)
4958 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4961 fmt = GET_RTX_FORMAT (GET_CODE (op));
4962 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4968 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4969 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4973 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4980 /* Return 1 if it is appropriate to emit `ret' instructions in the
4981 body of a function. Do this only if the epilogue is simple, needing a
4982 couple of insns. Prior to reloading, we can't tell how many registers
4983 must be saved, so return 0 then. Return 0 if there is no frame
4984 marker to de-allocate. */
4987 ix86_can_use_return_insn_p (void)
4989 struct ix86_frame frame;
4991 if (! reload_completed || frame_pointer_needed)
4994 /* Don't allow more than 32 pop, since that's all we can do
4995 with one instruction. */
4996 if (current_function_pops_args
4997 && current_function_args_size >= 32768)
5000 ix86_compute_frame_layout (&frame);
5001 return frame.to_allocate == 0 && frame.nregs == 0;
5004 /* Value should be nonzero if functions must have frame pointers.
5005 Zero means the frame pointer need not be set up (and parms may
5006 be accessed via the stack pointer) in functions that seem suitable. */
5009 ix86_frame_pointer_required (void)
5011 /* If we accessed previous frames, then the generated code expects
5012 to be able to access the saved ebp value in our frame. */
5013 if (cfun->machine->accesses_prev_frame)
5016 /* Several x86 os'es need a frame pointer for other reasons,
5017 usually pertaining to setjmp. */
5018 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5021 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5022 the frame pointer by default. Turn it back on now if we've not
5023 got a leaf function. */
5024 if (TARGET_OMIT_LEAF_FRAME_POINTER
5025 && (!current_function_is_leaf
5026 || ix86_current_function_calls_tls_descriptor))
5029 if (current_function_profile)
5035 /* Record that the current function accesses previous call frames. */
5038 ix86_setup_frame_addresses (void)
5040 cfun->machine->accesses_prev_frame = 1;
5043 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5044 # define USE_HIDDEN_LINKONCE 1
5046 # define USE_HIDDEN_LINKONCE 0
5049 static int pic_labels_used;
5051 /* Fills in the label name that should be used for a pc thunk for
5052 the given register. */
5055 get_pc_thunk_name (char name[32], unsigned int regno)
5057 gcc_assert (!TARGET_64BIT);
5059 if (USE_HIDDEN_LINKONCE)
5060 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5062 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5066 /* This function generates code for -fpic that loads %ebx with
5067 the return address of the caller and then returns. */
5070 ix86_file_end (void)
5075 for (regno = 0; regno < 8; ++regno)
5079 if (! ((pic_labels_used >> regno) & 1))
5082 get_pc_thunk_name (name, regno);
5087 switch_to_section (darwin_sections[text_coal_section]);
5088 fputs ("\t.weak_definition\t", asm_out_file);
5089 assemble_name (asm_out_file, name);
5090 fputs ("\n\t.private_extern\t", asm_out_file);
5091 assemble_name (asm_out_file, name);
5092 fputs ("\n", asm_out_file);
5093 ASM_OUTPUT_LABEL (asm_out_file, name);
5097 if (USE_HIDDEN_LINKONCE)
5101 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5103 TREE_PUBLIC (decl) = 1;
5104 TREE_STATIC (decl) = 1;
5105 DECL_ONE_ONLY (decl) = 1;
5107 (*targetm.asm_out.unique_section) (decl, 0);
5108 switch_to_section (get_named_section (decl, NULL, 0));
5110 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5111 fputs ("\t.hidden\t", asm_out_file);
5112 assemble_name (asm_out_file, name);
5113 fputc ('\n', asm_out_file);
5114 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5118 switch_to_section (text_section);
5119 ASM_OUTPUT_LABEL (asm_out_file, name);
5122 xops[0] = gen_rtx_REG (SImode, regno);
5123 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5124 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5125 output_asm_insn ("ret", xops);
5128 if (NEED_INDICATE_EXEC_STACK)
5129 file_end_indicate_exec_stack ();
5132 /* Emit code for the SET_GOT patterns. */
5135 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5141 if (TARGET_VXWORKS_RTP && flag_pic)
5143 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5144 xops[2] = gen_rtx_MEM (Pmode,
5145 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5146 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5148 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5149 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5150 an unadorned address. */
5151 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5152 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5153 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5157 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5159 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5161 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5164 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5166 output_asm_insn ("call\t%a2", xops);
5169 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5170 is what will be referenced by the Mach-O PIC subsystem. */
5172 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5175 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5176 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5179 output_asm_insn ("pop{l}\t%0", xops);
5184 get_pc_thunk_name (name, REGNO (dest));
5185 pic_labels_used |= 1 << REGNO (dest);
5187 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5188 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5189 output_asm_insn ("call\t%X2", xops);
5190 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5191 is what will be referenced by the Mach-O PIC subsystem. */
5194 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5196 targetm.asm_out.internal_label (asm_out_file, "L",
5197 CODE_LABEL_NUMBER (label));
5204 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5205 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5207 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5212 /* Generate an "push" pattern for input ARG. */
5217 return gen_rtx_SET (VOIDmode,
5219 gen_rtx_PRE_DEC (Pmode,
5220 stack_pointer_rtx)),
5224 /* Return >= 0 if there is an unused call-clobbered register available
5225 for the entire function. */
5228 ix86_select_alt_pic_regnum (void)
5230 if (current_function_is_leaf && !current_function_profile
5231 && !ix86_current_function_calls_tls_descriptor)
5234 for (i = 2; i >= 0; --i)
5235 if (!regs_ever_live[i])
5239 return INVALID_REGNUM;
5242 /* Return 1 if we need to save REGNO. */
5244 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5246 if (pic_offset_table_rtx
5247 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5248 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5249 || current_function_profile
5250 || current_function_calls_eh_return
5251 || current_function_uses_const_pool))
5253 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5258 if (current_function_calls_eh_return && maybe_eh_return)
5263 unsigned test = EH_RETURN_DATA_REGNO (i);
5264 if (test == INVALID_REGNUM)
5271 if (cfun->machine->force_align_arg_pointer
5272 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5275 return (regs_ever_live[regno]
5276 && !call_used_regs[regno]
5277 && !fixed_regs[regno]
5278 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5281 /* Return number of registers to be saved on the stack. */
5284 ix86_nsaved_regs (void)
5289 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5290 if (ix86_save_reg (regno, true))
5295 /* Return the offset between two registers, one to be eliminated, and the other
5296 its replacement, at the start of a routine. */
5299 ix86_initial_elimination_offset (int from, int to)
5301 struct ix86_frame frame;
5302 ix86_compute_frame_layout (&frame);
5304 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5305 return frame.hard_frame_pointer_offset;
5306 else if (from == FRAME_POINTER_REGNUM
5307 && to == HARD_FRAME_POINTER_REGNUM)
5308 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5311 gcc_assert (to == STACK_POINTER_REGNUM);
5313 if (from == ARG_POINTER_REGNUM)
5314 return frame.stack_pointer_offset;
5316 gcc_assert (from == FRAME_POINTER_REGNUM);
5317 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5321 /* Fill structure ix86_frame about frame of currently computed function. */
5324 ix86_compute_frame_layout (struct ix86_frame *frame)
5326 HOST_WIDE_INT total_size;
5327 unsigned int stack_alignment_needed;
5328 HOST_WIDE_INT offset;
5329 unsigned int preferred_alignment;
5330 HOST_WIDE_INT size = get_frame_size ();
5332 frame->nregs = ix86_nsaved_regs ();
5335 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5336 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5338 /* During reload iteration the amount of registers saved can change.
5339 Recompute the value as needed. Do not recompute when amount of registers
5340 didn't change as reload does multiple calls to the function and does not
5341 expect the decision to change within single iteration. */
5343 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5345 int count = frame->nregs;
5347 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5348 /* The fast prologue uses move instead of push to save registers. This
5349 is significantly longer, but also executes faster as modern hardware
5350 can execute the moves in parallel, but can't do that for push/pop.
5352 Be careful about choosing what prologue to emit: When function takes
5353 many instructions to execute we may use slow version as well as in
5354 case function is known to be outside hot spot (this is known with
5355 feedback only). Weight the size of function by number of registers
5356 to save as it is cheap to use one or two push instructions but very
5357 slow to use many of them. */
5359 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5360 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5361 || (flag_branch_probabilities
5362 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5363 cfun->machine->use_fast_prologue_epilogue = false;
5365 cfun->machine->use_fast_prologue_epilogue
5366 = !expensive_function_p (count);
5368 if (TARGET_PROLOGUE_USING_MOVE
5369 && cfun->machine->use_fast_prologue_epilogue)
5370 frame->save_regs_using_mov = true;
5372 frame->save_regs_using_mov = false;
5375 /* Skip return address and saved base pointer. */
5376 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5378 frame->hard_frame_pointer_offset = offset;
5380 /* Do some sanity checking of stack_alignment_needed and
5381 preferred_alignment, since i386 port is the only using those features
5382 that may break easily. */
5384 gcc_assert (!size || stack_alignment_needed);
5385 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5386 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5387 gcc_assert (stack_alignment_needed
5388 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5390 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5391 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5393 /* Register save area */
5394 offset += frame->nregs * UNITS_PER_WORD;
5397 if (ix86_save_varrargs_registers)
5399 offset += X86_64_VARARGS_SIZE;
5400 frame->va_arg_size = X86_64_VARARGS_SIZE;
5403 frame->va_arg_size = 0;
5405 /* Align start of frame for local function. */
5406 frame->padding1 = ((offset + stack_alignment_needed - 1)
5407 & -stack_alignment_needed) - offset;
5409 offset += frame->padding1;
5411 /* Frame pointer points here. */
5412 frame->frame_pointer_offset = offset;
5416 /* Add outgoing arguments area. Can be skipped if we eliminated
5417 all the function calls as dead code.
5418 Skipping is however impossible when function calls alloca. Alloca
5419 expander assumes that last current_function_outgoing_args_size
5420 of stack frame are unused. */
5421 if (ACCUMULATE_OUTGOING_ARGS
5422 && (!current_function_is_leaf || current_function_calls_alloca
5423 || ix86_current_function_calls_tls_descriptor))
5425 offset += current_function_outgoing_args_size;
5426 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5429 frame->outgoing_arguments_size = 0;
5431 /* Align stack boundary. Only needed if we're calling another function
5433 if (!current_function_is_leaf || current_function_calls_alloca
5434 || ix86_current_function_calls_tls_descriptor)
5435 frame->padding2 = ((offset + preferred_alignment - 1)
5436 & -preferred_alignment) - offset;
5438 frame->padding2 = 0;
5440 offset += frame->padding2;
5442 /* We've reached end of stack frame. */
5443 frame->stack_pointer_offset = offset;
5445 /* Size prologue needs to allocate. */
5446 frame->to_allocate =
5447 (size + frame->padding1 + frame->padding2
5448 + frame->outgoing_arguments_size + frame->va_arg_size);
5450 if ((!frame->to_allocate && frame->nregs <= 1)
5451 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5452 frame->save_regs_using_mov = false;
5454 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5455 && current_function_is_leaf
5456 && !ix86_current_function_calls_tls_descriptor)
5458 frame->red_zone_size = frame->to_allocate;
5459 if (frame->save_regs_using_mov)
5460 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5461 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5462 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5465 frame->red_zone_size = 0;
5466 frame->to_allocate -= frame->red_zone_size;
5467 frame->stack_pointer_offset -= frame->red_zone_size;
5469 fprintf (stderr, "\n");
5470 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5471 fprintf (stderr, "size: %ld\n", (long)size);
5472 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5473 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5474 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5475 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5476 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5477 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5478 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5479 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5480 (long)frame->hard_frame_pointer_offset);
5481 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5482 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5483 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5484 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5488 /* Emit code to save registers in the prologue. */
5491 ix86_emit_save_regs (void)
5496 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5497 if (ix86_save_reg (regno, true))
5499 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5500 RTX_FRAME_RELATED_P (insn) = 1;
5504 /* Emit code to save registers using MOV insns. First register
5505 is restored from POINTER + OFFSET. */
5507 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5513 if (ix86_save_reg (regno, true))
5515 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5517 gen_rtx_REG (Pmode, regno));
5518 RTX_FRAME_RELATED_P (insn) = 1;
5519 offset += UNITS_PER_WORD;
5523 /* Expand prologue or epilogue stack adjustment.
5524 The pattern exist to put a dependency on all ebp-based memory accesses.
5525 STYLE should be negative if instructions should be marked as frame related,
5526 zero if %r11 register is live and cannot be freely used and positive
5530 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5535 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5536 else if (x86_64_immediate_operand (offset, DImode))
5537 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5541 /* r11 is used by indirect sibcall return as well, set before the
5542 epilogue and used after the epilogue. ATM indirect sibcall
5543 shouldn't be used together with huge frame sizes in one
5544 function because of the frame_size check in sibcall.c. */
5546 r11 = gen_rtx_REG (DImode, R11_REG);
5547 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5549 RTX_FRAME_RELATED_P (insn) = 1;
5550 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5554 RTX_FRAME_RELATED_P (insn) = 1;
5557 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5560 ix86_internal_arg_pointer (void)
5562 bool has_force_align_arg_pointer =
5563 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5564 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5565 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5566 && DECL_NAME (current_function_decl)
5567 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5568 && DECL_FILE_SCOPE_P (current_function_decl))
5569 || ix86_force_align_arg_pointer
5570 || has_force_align_arg_pointer)
5572 /* Nested functions can't realign the stack due to a register
5574 if (DECL_CONTEXT (current_function_decl)
5575 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5577 if (ix86_force_align_arg_pointer)
5578 warning (0, "-mstackrealign ignored for nested functions");
5579 if (has_force_align_arg_pointer)
5580 error ("%s not supported for nested functions",
5581 ix86_force_align_arg_pointer_string);
5582 return virtual_incoming_args_rtx;
5584 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5585 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5588 return virtual_incoming_args_rtx;
5591 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5592 This is called from dwarf2out.c to emit call frame instructions
5593 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5595 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5597 rtx unspec = SET_SRC (pattern);
5598 gcc_assert (GET_CODE (unspec) == UNSPEC);
5602 case UNSPEC_REG_SAVE:
5603 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5604 SET_DEST (pattern));
5606 case UNSPEC_DEF_CFA:
5607 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5608 INTVAL (XVECEXP (unspec, 0, 0)));
5615 /* Expand the prologue into a bunch of separate insns. */
5618 ix86_expand_prologue (void)
5622 struct ix86_frame frame;
5623 HOST_WIDE_INT allocate;
5625 ix86_compute_frame_layout (&frame);
5627 if (cfun->machine->force_align_arg_pointer)
5631 /* Grab the argument pointer. */
5632 x = plus_constant (stack_pointer_rtx, 4);
5633 y = cfun->machine->force_align_arg_pointer;
5634 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5635 RTX_FRAME_RELATED_P (insn) = 1;
5637 /* The unwind info consists of two parts: install the fafp as the cfa,
5638 and record the fafp as the "save register" of the stack pointer.
5639 The later is there in order that the unwinder can see where it
5640 should restore the stack pointer across the and insn. */
5641 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5642 x = gen_rtx_SET (VOIDmode, y, x);
5643 RTX_FRAME_RELATED_P (x) = 1;
5644 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5646 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5647 RTX_FRAME_RELATED_P (y) = 1;
5648 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5649 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5650 REG_NOTES (insn) = x;
5652 /* Align the stack. */
5653 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5656 /* And here we cheat like madmen with the unwind info. We force the
5657 cfa register back to sp+4, which is exactly what it was at the
5658 start of the function. Re-pushing the return address results in
5659 the return at the same spot relative to the cfa, and thus is
5660 correct wrt the unwind info. */
5661 x = cfun->machine->force_align_arg_pointer;
5662 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5663 insn = emit_insn (gen_push (x));
5664 RTX_FRAME_RELATED_P (insn) = 1;
5667 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5668 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5669 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5670 REG_NOTES (insn) = x;
5673 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5674 slower on all targets. Also sdb doesn't like it. */
5676 if (frame_pointer_needed)
5678 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5679 RTX_FRAME_RELATED_P (insn) = 1;
5681 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5682 RTX_FRAME_RELATED_P (insn) = 1;
5685 allocate = frame.to_allocate;
5687 if (!frame.save_regs_using_mov)
5688 ix86_emit_save_regs ();
5690 allocate += frame.nregs * UNITS_PER_WORD;
5692 /* When using red zone we may start register saving before allocating
5693 the stack frame saving one cycle of the prologue. */
5694 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5695 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5696 : stack_pointer_rtx,
5697 -frame.nregs * UNITS_PER_WORD);
5701 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5702 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5703 GEN_INT (-allocate), -1);
5706 /* Only valid for Win32. */
5707 rtx eax = gen_rtx_REG (SImode, 0);
5708 bool eax_live = ix86_eax_live_at_start_p ();
5711 gcc_assert (!TARGET_64BIT);
5715 emit_insn (gen_push (eax));
5719 emit_move_insn (eax, GEN_INT (allocate));
5721 insn = emit_insn (gen_allocate_stack_worker (eax));
5722 RTX_FRAME_RELATED_P (insn) = 1;
5723 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5724 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5725 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5726 t, REG_NOTES (insn));
5730 if (frame_pointer_needed)
5731 t = plus_constant (hard_frame_pointer_rtx,
5734 - frame.nregs * UNITS_PER_WORD);
5736 t = plus_constant (stack_pointer_rtx, allocate);
5737 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5741 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5743 if (!frame_pointer_needed || !frame.to_allocate)
5744 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5746 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5747 -frame.nregs * UNITS_PER_WORD);
5750 pic_reg_used = false;
5751 if (pic_offset_table_rtx
5752 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5753 || current_function_profile))
5755 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5757 if (alt_pic_reg_used != INVALID_REGNUM)
5758 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5760 pic_reg_used = true;
5767 if (ix86_cmodel == CM_LARGE_PIC)
5769 rtx tmp_reg = gen_rtx_REG (DImode,
5770 FIRST_REX_INT_REG + 3 /* R11 */);
5771 rtx label = gen_label_rtx ();
5773 LABEL_PRESERVE_P (label) = 1;
5774 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5775 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5776 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5777 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5778 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5779 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5780 pic_offset_table_rtx, tmp_reg));
5783 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5786 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5788 /* Even with accurate pre-reload life analysis, we can wind up
5789 deleting all references to the pic register after reload.
5790 Consider if cross-jumping unifies two sides of a branch
5791 controlled by a comparison vs the only read from a global.
5792 In which case, allow the set_got to be deleted, though we're
5793 too late to do anything about the ebx save in the prologue. */
5794 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5797 /* Prevent function calls from be scheduled before the call to mcount.
5798 In the pic_reg_used case, make sure that the got load isn't deleted. */
5799 if (current_function_profile)
5800 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5803 /* Emit code to restore saved registers using MOV insns. First register
5804 is restored from POINTER + OFFSET. */
5806 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5807 int maybe_eh_return)
5810 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5812 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5813 if (ix86_save_reg (regno, maybe_eh_return))
5815 /* Ensure that adjust_address won't be forced to produce pointer
5816 out of range allowed by x86-64 instruction set. */
5817 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5821 r11 = gen_rtx_REG (DImode, R11_REG);
5822 emit_move_insn (r11, GEN_INT (offset));
5823 emit_insn (gen_adddi3 (r11, r11, pointer));
5824 base_address = gen_rtx_MEM (Pmode, r11);
5827 emit_move_insn (gen_rtx_REG (Pmode, regno),
5828 adjust_address (base_address, Pmode, offset));
5829 offset += UNITS_PER_WORD;
5833 /* Restore function stack, frame, and registers. */
5836 ix86_expand_epilogue (int style)
5839 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5840 struct ix86_frame frame;
5841 HOST_WIDE_INT offset;
5843 ix86_compute_frame_layout (&frame);
5845 /* Calculate start of saved registers relative to ebp. Special care
5846 must be taken for the normal return case of a function using
5847 eh_return: the eax and edx registers are marked as saved, but not
5848 restored along this path. */
5849 offset = frame.nregs;
5850 if (current_function_calls_eh_return && style != 2)
5852 offset *= -UNITS_PER_WORD;
5854 /* If we're only restoring one register and sp is not valid then
5855 using a move instruction to restore the register since it's
5856 less work than reloading sp and popping the register.
5858 The default code result in stack adjustment using add/lea instruction,
5859 while this code results in LEAVE instruction (or discrete equivalent),
5860 so it is profitable in some other cases as well. Especially when there
5861 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5862 and there is exactly one register to pop. This heuristic may need some
5863 tuning in future. */
5864 if ((!sp_valid && frame.nregs <= 1)
5865 || (TARGET_EPILOGUE_USING_MOVE
5866 && cfun->machine->use_fast_prologue_epilogue
5867 && (frame.nregs > 1 || frame.to_allocate))
5868 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5869 || (frame_pointer_needed && TARGET_USE_LEAVE
5870 && cfun->machine->use_fast_prologue_epilogue
5871 && frame.nregs == 1)
5872 || current_function_calls_eh_return)
5874 /* Restore registers. We can use ebp or esp to address the memory
5875 locations. If both are available, default to ebp, since offsets
5876 are known to be small. Only exception is esp pointing directly to the
5877 end of block of saved registers, where we may simplify addressing
5880 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5881 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5882 frame.to_allocate, style == 2);
5884 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5885 offset, style == 2);
5887 /* eh_return epilogues need %ecx added to the stack pointer. */
5890 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5892 if (frame_pointer_needed)
5894 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5895 tmp = plus_constant (tmp, UNITS_PER_WORD);
5896 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5898 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5899 emit_move_insn (hard_frame_pointer_rtx, tmp);
5901 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5906 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5907 tmp = plus_constant (tmp, (frame.to_allocate
5908 + frame.nregs * UNITS_PER_WORD));
5909 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5912 else if (!frame_pointer_needed)
5913 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5914 GEN_INT (frame.to_allocate
5915 + frame.nregs * UNITS_PER_WORD),
5917 /* If not an i386, mov & pop is faster than "leave". */
5918 else if (TARGET_USE_LEAVE || optimize_size
5919 || !cfun->machine->use_fast_prologue_epilogue)
5920 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5923 pro_epilogue_adjust_stack (stack_pointer_rtx,
5924 hard_frame_pointer_rtx,
5927 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5929 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5934 /* First step is to deallocate the stack frame so that we can
5935 pop the registers. */
5938 gcc_assert (frame_pointer_needed);
5939 pro_epilogue_adjust_stack (stack_pointer_rtx,
5940 hard_frame_pointer_rtx,
5941 GEN_INT (offset), style);
5943 else if (frame.to_allocate)
5944 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5945 GEN_INT (frame.to_allocate), style);
5947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5948 if (ix86_save_reg (regno, false))
5951 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5953 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5955 if (frame_pointer_needed)
5957 /* Leave results in shorter dependency chains on CPUs that are
5958 able to grok it fast. */
5959 if (TARGET_USE_LEAVE)
5960 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5961 else if (TARGET_64BIT)
5962 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5964 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5968 if (cfun->machine->force_align_arg_pointer)
5970 emit_insn (gen_addsi3 (stack_pointer_rtx,
5971 cfun->machine->force_align_arg_pointer,
5975 /* Sibcall epilogues don't want a return instruction. */
5979 if (current_function_pops_args && current_function_args_size)
5981 rtx popc = GEN_INT (current_function_pops_args);
5983 /* i386 can only pop 64K bytes. If asked to pop more, pop
5984 return address, do explicit add, and jump indirectly to the
5987 if (current_function_pops_args >= 65536)
5989 rtx ecx = gen_rtx_REG (SImode, 2);
5991 /* There is no "pascal" calling convention in 64bit ABI. */
5992 gcc_assert (!TARGET_64BIT);
5994 emit_insn (gen_popsi1 (ecx));
5995 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5996 emit_jump_insn (gen_return_indirect_internal (ecx));
5999 emit_jump_insn (gen_return_pop_internal (popc));
6002 emit_jump_insn (gen_return_internal ());
6005 /* Reset from the function's potential modifications. */
6008 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6009 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6011 if (pic_offset_table_rtx)
6012 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6014 /* Mach-O doesn't support labels at the end of objects, so if
6015 it looks like we might want one, insert a NOP. */
6017 rtx insn = get_last_insn ();
6020 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6021 insn = PREV_INSN (insn);
6025 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6026 fputs ("\tnop\n", file);
6032 /* Extract the parts of an RTL expression that is a valid memory address
6033 for an instruction. Return 0 if the structure of the address is
6034 grossly off. Return -1 if the address contains ASHIFT, so it is not
6035 strictly valid, but still used for computing length of lea instruction. */
6038 ix86_decompose_address (rtx addr, struct ix86_address *out)
6040 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6041 rtx base_reg, index_reg;
6042 HOST_WIDE_INT scale = 1;
6043 rtx scale_rtx = NULL_RTX;
6045 enum ix86_address_seg seg = SEG_DEFAULT;
6047 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6049 else if (GET_CODE (addr) == PLUS)
6059 addends[n++] = XEXP (op, 1);
6062 while (GET_CODE (op) == PLUS);
6067 for (i = n; i >= 0; --i)
6070 switch (GET_CODE (op))
6075 index = XEXP (op, 0);
6076 scale_rtx = XEXP (op, 1);
6080 if (XINT (op, 1) == UNSPEC_TP
6081 && TARGET_TLS_DIRECT_SEG_REFS
6082 && seg == SEG_DEFAULT)
6083 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6112 else if (GET_CODE (addr) == MULT)
6114 index = XEXP (addr, 0); /* index*scale */
6115 scale_rtx = XEXP (addr, 1);
6117 else if (GET_CODE (addr) == ASHIFT)
6121 /* We're called for lea too, which implements ashift on occasion. */
6122 index = XEXP (addr, 0);
6123 tmp = XEXP (addr, 1);
6124 if (!CONST_INT_P (tmp))
6126 scale = INTVAL (tmp);
6127 if ((unsigned HOST_WIDE_INT) scale > 3)
6133 disp = addr; /* displacement */
6135 /* Extract the integral value of scale. */
6138 if (!CONST_INT_P (scale_rtx))
6140 scale = INTVAL (scale_rtx);
6143 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6144 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6146 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6147 if (base_reg && index_reg && scale == 1
6148 && (index_reg == arg_pointer_rtx
6149 || index_reg == frame_pointer_rtx
6150 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6153 tmp = base, base = index, index = tmp;
6154 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6157 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6158 if ((base_reg == hard_frame_pointer_rtx
6159 || base_reg == frame_pointer_rtx
6160 || base_reg == arg_pointer_rtx) && !disp)
6163 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6164 Avoid this by transforming to [%esi+0]. */
6165 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6166 && base_reg && !index_reg && !disp
6168 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6171 /* Special case: encode reg+reg instead of reg*2. */
6172 if (!base && index && scale && scale == 2)
6173 base = index, base_reg = index_reg, scale = 1;
6175 /* Special case: scaling cannot be encoded without base or displacement. */
6176 if (!base && !disp && index && scale != 1)
6188 /* Return cost of the memory address x.
6189 For i386, it is better to use a complex address than let gcc copy
6190 the address into a reg and make a new pseudo. But not if the address
6191 requires to two regs - that would mean more pseudos with longer
6194 ix86_address_cost (rtx x)
6196 struct ix86_address parts;
6198 int ok = ix86_decompose_address (x, &parts);
6202 if (parts.base && GET_CODE (parts.base) == SUBREG)
6203 parts.base = SUBREG_REG (parts.base);
6204 if (parts.index && GET_CODE (parts.index) == SUBREG)
6205 parts.index = SUBREG_REG (parts.index);
6207 /* More complex memory references are better. */
6208 if (parts.disp && parts.disp != const0_rtx)
6210 if (parts.seg != SEG_DEFAULT)
6213 /* Attempt to minimize number of registers in the address. */
6215 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6217 && (!REG_P (parts.index)
6218 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6222 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6224 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6225 && parts.base != parts.index)
6228 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6229 since it's predecode logic can't detect the length of instructions
6230 and it degenerates to vector decoded. Increase cost of such
6231 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6232 to split such addresses or even refuse such addresses at all.
6234 Following addressing modes are affected:
6239 The first and last case may be avoidable by explicitly coding the zero in
6240 memory address, but I don't have AMD-K6 machine handy to check this
6244 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6245 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6246 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6252 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6253 this is used for to form addresses to local data when -fPIC is in
6257 darwin_local_data_pic (rtx disp)
6259 if (GET_CODE (disp) == MINUS)
6261 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6262 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6263 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6265 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6266 if (! strcmp (sym_name, "<pic base>"))
6274 /* Determine if a given RTX is a valid constant. We already know this
6275 satisfies CONSTANT_P. */
6278 legitimate_constant_p (rtx x)
6280 switch (GET_CODE (x))
6285 if (GET_CODE (x) == PLUS)
6287 if (!CONST_INT_P (XEXP (x, 1)))
6292 if (TARGET_MACHO && darwin_local_data_pic (x))
6295 /* Only some unspecs are valid as "constants". */
6296 if (GET_CODE (x) == UNSPEC)
6297 switch (XINT (x, 1))
6302 return TARGET_64BIT;
6305 x = XVECEXP (x, 0, 0);
6306 return (GET_CODE (x) == SYMBOL_REF
6307 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6309 x = XVECEXP (x, 0, 0);
6310 return (GET_CODE (x) == SYMBOL_REF
6311 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6316 /* We must have drilled down to a symbol. */
6317 if (GET_CODE (x) == LABEL_REF)
6319 if (GET_CODE (x) != SYMBOL_REF)
6324 /* TLS symbols are never valid. */
6325 if (SYMBOL_REF_TLS_MODEL (x))
6330 if (GET_MODE (x) == TImode
6331 && x != CONST0_RTX (TImode)
6337 if (x == CONST0_RTX (GET_MODE (x)))
6345 /* Otherwise we handle everything else in the move patterns. */
6349 /* Determine if it's legal to put X into the constant pool. This
6350 is not possible for the address of thread-local symbols, which
6351 is checked above. */
6354 ix86_cannot_force_const_mem (rtx x)
6356 /* We can always put integral constants and vectors in memory. */
6357 switch (GET_CODE (x))
6367 return !legitimate_constant_p (x);
6370 /* Determine if a given RTX is a valid constant address. */
6373 constant_address_p (rtx x)
6375 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6378 /* Nonzero if the constant value X is a legitimate general operand
6379 when generating PIC code. It is given that flag_pic is on and
6380 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6383 legitimate_pic_operand_p (rtx x)
6387 switch (GET_CODE (x))
6390 inner = XEXP (x, 0);
6391 if (GET_CODE (inner) == PLUS
6392 && CONST_INT_P (XEXP (inner, 1)))
6393 inner = XEXP (inner, 0);
6395 /* Only some unspecs are valid as "constants". */
6396 if (GET_CODE (inner) == UNSPEC)
6397 switch (XINT (inner, 1))
6402 return TARGET_64BIT;
6404 x = XVECEXP (inner, 0, 0);
6405 return (GET_CODE (x) == SYMBOL_REF
6406 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6414 return legitimate_pic_address_disp_p (x);
6421 /* Determine if a given CONST RTX is a valid memory displacement
6425 legitimate_pic_address_disp_p (rtx disp)
6429 /* In 64bit mode we can allow direct addresses of symbols and labels
6430 when they are not dynamic symbols. */
6433 rtx op0 = disp, op1;
6435 switch (GET_CODE (disp))
6441 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6443 op0 = XEXP (XEXP (disp, 0), 0);
6444 op1 = XEXP (XEXP (disp, 0), 1);
6445 if (!CONST_INT_P (op1)
6446 || INTVAL (op1) >= 16*1024*1024
6447 || INTVAL (op1) < -16*1024*1024)
6449 if (GET_CODE (op0) == LABEL_REF)
6451 if (GET_CODE (op0) != SYMBOL_REF)
6456 /* TLS references should always be enclosed in UNSPEC. */
6457 if (SYMBOL_REF_TLS_MODEL (op0))
6459 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6460 && ix86_cmodel != CM_LARGE_PIC)
6468 if (GET_CODE (disp) != CONST)
6470 disp = XEXP (disp, 0);
6474 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6475 of GOT tables. We should not need these anyway. */
6476 if (GET_CODE (disp) != UNSPEC
6477 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6478 && XINT (disp, 1) != UNSPEC_GOTOFF
6479 && XINT (disp, 1) != UNSPEC_PLTOFF))
6482 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6483 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6489 if (GET_CODE (disp) == PLUS)
6491 if (!CONST_INT_P (XEXP (disp, 1)))
6493 disp = XEXP (disp, 0);
6497 if (TARGET_MACHO && darwin_local_data_pic (disp))
6500 if (GET_CODE (disp) != UNSPEC)
6503 switch (XINT (disp, 1))
6508 /* We need to check for both symbols and labels because VxWorks loads
6509 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6511 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6512 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6514 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6515 While ABI specify also 32bit relocation but we don't produce it in
6516 small PIC model at all. */
6517 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6518 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6520 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6522 case UNSPEC_GOTTPOFF:
6523 case UNSPEC_GOTNTPOFF:
6524 case UNSPEC_INDNTPOFF:
6527 disp = XVECEXP (disp, 0, 0);
6528 return (GET_CODE (disp) == SYMBOL_REF
6529 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6531 disp = XVECEXP (disp, 0, 0);
6532 return (GET_CODE (disp) == SYMBOL_REF
6533 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6535 disp = XVECEXP (disp, 0, 0);
6536 return (GET_CODE (disp) == SYMBOL_REF
6537 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6543 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6544 memory address for an instruction. The MODE argument is the machine mode
6545 for the MEM expression that wants to use this address.
6547 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6548 convert common non-canonical forms to canonical form so that they will
6552 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6553 rtx addr, int strict)
6555 struct ix86_address parts;
6556 rtx base, index, disp;
6557 HOST_WIDE_INT scale;
6558 const char *reason = NULL;
6559 rtx reason_rtx = NULL_RTX;
6561 if (ix86_decompose_address (addr, &parts) <= 0)
6563 reason = "decomposition failed";
6568 index = parts.index;
6570 scale = parts.scale;
6572 /* Validate base register.
6574 Don't allow SUBREG's that span more than a word here. It can lead to spill
6575 failures when the base is one word out of a two word structure, which is
6576 represented internally as a DImode int. */
6585 else if (GET_CODE (base) == SUBREG
6586 && REG_P (SUBREG_REG (base))
6587 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6589 reg = SUBREG_REG (base);
6592 reason = "base is not a register";
6596 if (GET_MODE (base) != Pmode)
6598 reason = "base is not in Pmode";
6602 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6603 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6605 reason = "base is not valid";
6610 /* Validate index register.
6612 Don't allow SUBREG's that span more than a word here -- same as above. */
6621 else if (GET_CODE (index) == SUBREG
6622 && REG_P (SUBREG_REG (index))
6623 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6625 reg = SUBREG_REG (index);
6628 reason = "index is not a register";
6632 if (GET_MODE (index) != Pmode)
6634 reason = "index is not in Pmode";
6638 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6639 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6641 reason = "index is not valid";
6646 /* Validate scale factor. */
6649 reason_rtx = GEN_INT (scale);
6652 reason = "scale without index";
6656 if (scale != 2 && scale != 4 && scale != 8)
6658 reason = "scale is not a valid multiplier";
6663 /* Validate displacement. */
6668 if (GET_CODE (disp) == CONST
6669 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6670 switch (XINT (XEXP (disp, 0), 1))
6672 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6673 used. While ABI specify also 32bit relocations, we don't produce
6674 them at all and use IP relative instead. */
6677 gcc_assert (flag_pic);
6679 goto is_legitimate_pic;
6680 reason = "64bit address unspec";
6683 case UNSPEC_GOTPCREL:
6684 gcc_assert (flag_pic);
6685 goto is_legitimate_pic;
6687 case UNSPEC_GOTTPOFF:
6688 case UNSPEC_GOTNTPOFF:
6689 case UNSPEC_INDNTPOFF:
6695 reason = "invalid address unspec";
6699 else if (SYMBOLIC_CONST (disp)
6703 && MACHOPIC_INDIRECT
6704 && !machopic_operand_p (disp)
6710 if (TARGET_64BIT && (index || base))
6712 /* foo@dtpoff(%rX) is ok. */
6713 if (GET_CODE (disp) != CONST
6714 || GET_CODE (XEXP (disp, 0)) != PLUS
6715 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6716 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6717 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6718 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6720 reason = "non-constant pic memory reference";
6724 else if (! legitimate_pic_address_disp_p (disp))
6726 reason = "displacement is an invalid pic construct";
6730 /* This code used to verify that a symbolic pic displacement
6731 includes the pic_offset_table_rtx register.
6733 While this is good idea, unfortunately these constructs may
6734 be created by "adds using lea" optimization for incorrect
6743 This code is nonsensical, but results in addressing
6744 GOT table with pic_offset_table_rtx base. We can't
6745 just refuse it easily, since it gets matched by
6746 "addsi3" pattern, that later gets split to lea in the
6747 case output register differs from input. While this
6748 can be handled by separate addsi pattern for this case
6749 that never results in lea, this seems to be easier and
6750 correct fix for crash to disable this test. */
6752 else if (GET_CODE (disp) != LABEL_REF
6753 && !CONST_INT_P (disp)
6754 && (GET_CODE (disp) != CONST
6755 || !legitimate_constant_p (disp))
6756 && (GET_CODE (disp) != SYMBOL_REF
6757 || !legitimate_constant_p (disp)))
6759 reason = "displacement is not constant";
6762 else if (TARGET_64BIT
6763 && !x86_64_immediate_operand (disp, VOIDmode))
6765 reason = "displacement is out of range";
6770 /* Everything looks valid. */
6777 /* Return a unique alias set for the GOT. */
6779 static HOST_WIDE_INT
6780 ix86_GOT_alias_set (void)
6782 static HOST_WIDE_INT set = -1;
6784 set = new_alias_set ();
6788 /* Return a legitimate reference for ORIG (an address) using the
6789 register REG. If REG is 0, a new pseudo is generated.
6791 There are two types of references that must be handled:
6793 1. Global data references must load the address from the GOT, via
6794 the PIC reg. An insn is emitted to do this load, and the reg is
6797 2. Static data references, constant pool addresses, and code labels
6798 compute the address as an offset from the GOT, whose base is in
6799 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6800 differentiate them from global data objects. The returned
6801 address is the PIC reg + an unspec constant.
6803 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6804 reg also appears in the address. */
6807 legitimize_pic_address (rtx orig, rtx reg)
6814 if (TARGET_MACHO && !TARGET_64BIT)
6817 reg = gen_reg_rtx (Pmode);
6818 /* Use the generic Mach-O PIC machinery. */
6819 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6823 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6825 else if (TARGET_64BIT
6826 && ix86_cmodel != CM_SMALL_PIC
6827 && gotoff_operand (addr, Pmode))
6830 /* This symbol may be referenced via a displacement from the PIC
6831 base address (@GOTOFF). */
6833 if (reload_in_progress)
6834 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6835 if (GET_CODE (addr) == CONST)
6836 addr = XEXP (addr, 0);
6837 if (GET_CODE (addr) == PLUS)
6839 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6840 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6843 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6844 new = gen_rtx_CONST (Pmode, new);
6846 tmpreg = gen_reg_rtx (Pmode);
6849 emit_move_insn (tmpreg, new);
6853 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6854 tmpreg, 1, OPTAB_DIRECT);
6857 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6859 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
6861 /* This symbol may be referenced via a displacement from the PIC
6862 base address (@GOTOFF). */
6864 if (reload_in_progress)
6865 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6866 if (GET_CODE (addr) == CONST)
6867 addr = XEXP (addr, 0);
6868 if (GET_CODE (addr) == PLUS)
6870 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6871 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6874 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6875 new = gen_rtx_CONST (Pmode, new);
6876 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6880 emit_move_insn (reg, new);
6884 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6885 /* We can't use @GOTOFF for text labels on VxWorks;
6886 see gotoff_operand. */
6887 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
6889 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
6891 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6892 new = gen_rtx_CONST (Pmode, new);
6893 new = gen_const_mem (Pmode, new);
6894 set_mem_alias_set (new, ix86_GOT_alias_set ());
6897 reg = gen_reg_rtx (Pmode);
6898 /* Use directly gen_movsi, otherwise the address is loaded
6899 into register for CSE. We don't want to CSE this addresses,
6900 instead we CSE addresses from the GOT table, so skip this. */
6901 emit_insn (gen_movsi (reg, new));
6906 /* This symbol must be referenced via a load from the
6907 Global Offset Table (@GOT). */
6909 if (reload_in_progress)
6910 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6911 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6912 new = gen_rtx_CONST (Pmode, new);
6914 new = force_reg (Pmode, new);
6915 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6916 new = gen_const_mem (Pmode, new);
6917 set_mem_alias_set (new, ix86_GOT_alias_set ());
6920 reg = gen_reg_rtx (Pmode);
6921 emit_move_insn (reg, new);
6927 if (CONST_INT_P (addr)
6928 && !x86_64_immediate_operand (addr, VOIDmode))
6932 emit_move_insn (reg, addr);
6936 new = force_reg (Pmode, addr);
6938 else if (GET_CODE (addr) == CONST)
6940 addr = XEXP (addr, 0);
6942 /* We must match stuff we generate before. Assume the only
6943 unspecs that can get here are ours. Not that we could do
6944 anything with them anyway.... */
6945 if (GET_CODE (addr) == UNSPEC
6946 || (GET_CODE (addr) == PLUS
6947 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6949 gcc_assert (GET_CODE (addr) == PLUS);
6951 if (GET_CODE (addr) == PLUS)
6953 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6955 /* Check first to see if this is a constant offset from a @GOTOFF
6956 symbol reference. */
6957 if (gotoff_operand (op0, Pmode)
6958 && CONST_INT_P (op1))
6962 if (reload_in_progress)
6963 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6964 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6966 new = gen_rtx_PLUS (Pmode, new, op1);
6967 new = gen_rtx_CONST (Pmode, new);
6968 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6972 emit_move_insn (reg, new);
6978 if (INTVAL (op1) < -16*1024*1024
6979 || INTVAL (op1) >= 16*1024*1024)
6981 if (!x86_64_immediate_operand (op1, Pmode))
6982 op1 = force_reg (Pmode, op1);
6983 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6989 base = legitimize_pic_address (XEXP (addr, 0), reg);
6990 new = legitimize_pic_address (XEXP (addr, 1),
6991 base == reg ? NULL_RTX : reg);
6993 if (CONST_INT_P (new))
6994 new = plus_constant (base, INTVAL (new));
6997 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6999 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7000 new = XEXP (new, 1);
7002 new = gen_rtx_PLUS (Pmode, base, new);
7010 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7013 get_thread_pointer (int to_reg)
7017 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7021 reg = gen_reg_rtx (Pmode);
7022 insn = gen_rtx_SET (VOIDmode, reg, tp);
7023 insn = emit_insn (insn);
7028 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7029 false if we expect this to be used for a memory address and true if
7030 we expect to load the address into a register. */
7033 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7035 rtx dest, base, off, pic, tp;
7040 case TLS_MODEL_GLOBAL_DYNAMIC:
7041 dest = gen_reg_rtx (Pmode);
7042 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7044 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7046 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7049 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7050 insns = get_insns ();
7053 emit_libcall_block (insns, dest, rax, x);
7055 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7056 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7058 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7060 if (TARGET_GNU2_TLS)
7062 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7064 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7068 case TLS_MODEL_LOCAL_DYNAMIC:
7069 base = gen_reg_rtx (Pmode);
7070 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7072 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7074 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7077 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7078 insns = get_insns ();
7081 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7082 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7083 emit_libcall_block (insns, base, rax, note);
7085 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7086 emit_insn (gen_tls_local_dynamic_base_64 (base));
7088 emit_insn (gen_tls_local_dynamic_base_32 (base));
7090 if (TARGET_GNU2_TLS)
7092 rtx x = ix86_tls_module_base ();
7094 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7095 gen_rtx_MINUS (Pmode, x, tp));
7098 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7099 off = gen_rtx_CONST (Pmode, off);
7101 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7103 if (TARGET_GNU2_TLS)
7105 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7107 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7112 case TLS_MODEL_INITIAL_EXEC:
7116 type = UNSPEC_GOTNTPOFF;
7120 if (reload_in_progress)
7121 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7122 pic = pic_offset_table_rtx;
7123 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7125 else if (!TARGET_ANY_GNU_TLS)
7127 pic = gen_reg_rtx (Pmode);
7128 emit_insn (gen_set_got (pic));
7129 type = UNSPEC_GOTTPOFF;
7134 type = UNSPEC_INDNTPOFF;
7137 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7138 off = gen_rtx_CONST (Pmode, off);
7140 off = gen_rtx_PLUS (Pmode, pic, off);
7141 off = gen_const_mem (Pmode, off);
7142 set_mem_alias_set (off, ix86_GOT_alias_set ());
7144 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7146 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7147 off = force_reg (Pmode, off);
7148 return gen_rtx_PLUS (Pmode, base, off);
7152 base = get_thread_pointer (true);
7153 dest = gen_reg_rtx (Pmode);
7154 emit_insn (gen_subsi3 (dest, base, off));
7158 case TLS_MODEL_LOCAL_EXEC:
7159 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7160 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7161 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7162 off = gen_rtx_CONST (Pmode, off);
7164 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7166 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7167 return gen_rtx_PLUS (Pmode, base, off);
7171 base = get_thread_pointer (true);
7172 dest = gen_reg_rtx (Pmode);
7173 emit_insn (gen_subsi3 (dest, base, off));
7184 /* Try machine-dependent ways of modifying an illegitimate address
7185 to be legitimate. If we find one, return the new, valid address.
7186 This macro is used in only one place: `memory_address' in explow.c.
7188 OLDX is the address as it was before break_out_memory_refs was called.
7189 In some cases it is useful to look at this to decide what needs to be done.
7191 MODE and WIN are passed so that this macro can use
7192 GO_IF_LEGITIMATE_ADDRESS.
7194 It is always safe for this macro to do nothing. It exists to recognize
7195 opportunities to optimize the output.
7197 For the 80386, we handle X+REG by loading X into a register R and
7198 using R+REG. R will go in a general reg and indexing will be used.
7199 However, if REG is a broken-out memory address or multiplication,
7200 nothing needs to be done because REG can certainly go in a general reg.
7202 When -fpic is used, special handling is needed for symbolic references.
7203 See comments by legitimize_pic_address in i386.c for details. */
7206 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7211 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7213 return legitimize_tls_address (x, log, false);
7214 if (GET_CODE (x) == CONST
7215 && GET_CODE (XEXP (x, 0)) == PLUS
7216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7217 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7219 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7220 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7223 if (flag_pic && SYMBOLIC_CONST (x))
7224 return legitimize_pic_address (x, 0);
7226 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7227 if (GET_CODE (x) == ASHIFT
7228 && CONST_INT_P (XEXP (x, 1))
7229 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7232 log = INTVAL (XEXP (x, 1));
7233 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7234 GEN_INT (1 << log));
7237 if (GET_CODE (x) == PLUS)
7239 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7241 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7242 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7243 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7246 log = INTVAL (XEXP (XEXP (x, 0), 1));
7247 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7248 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7249 GEN_INT (1 << log));
7252 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7253 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7254 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7257 log = INTVAL (XEXP (XEXP (x, 1), 1));
7258 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7259 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7260 GEN_INT (1 << log));
7263 /* Put multiply first if it isn't already. */
7264 if (GET_CODE (XEXP (x, 1)) == MULT)
7266 rtx tmp = XEXP (x, 0);
7267 XEXP (x, 0) = XEXP (x, 1);
7272 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7273 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7274 created by virtual register instantiation, register elimination, and
7275 similar optimizations. */
7276 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7279 x = gen_rtx_PLUS (Pmode,
7280 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7281 XEXP (XEXP (x, 1), 0)),
7282 XEXP (XEXP (x, 1), 1));
7286 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7287 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7288 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7289 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7290 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7291 && CONSTANT_P (XEXP (x, 1)))
7294 rtx other = NULL_RTX;
7296 if (CONST_INT_P (XEXP (x, 1)))
7298 constant = XEXP (x, 1);
7299 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7301 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7303 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7304 other = XEXP (x, 1);
7312 x = gen_rtx_PLUS (Pmode,
7313 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7314 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7315 plus_constant (other, INTVAL (constant)));
7319 if (changed && legitimate_address_p (mode, x, FALSE))
7322 if (GET_CODE (XEXP (x, 0)) == MULT)
7325 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7328 if (GET_CODE (XEXP (x, 1)) == MULT)
7331 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7335 && REG_P (XEXP (x, 1))
7336 && REG_P (XEXP (x, 0)))
7339 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7342 x = legitimize_pic_address (x, 0);
7345 if (changed && legitimate_address_p (mode, x, FALSE))
7348 if (REG_P (XEXP (x, 0)))
7350 rtx temp = gen_reg_rtx (Pmode);
7351 rtx val = force_operand (XEXP (x, 1), temp);
7353 emit_move_insn (temp, val);
7359 else if (REG_P (XEXP (x, 1)))
7361 rtx temp = gen_reg_rtx (Pmode);
7362 rtx val = force_operand (XEXP (x, 0), temp);
7364 emit_move_insn (temp, val);
7374 /* Print an integer constant expression in assembler syntax. Addition
7375 and subtraction are the only arithmetic that may appear in these
7376 expressions. FILE is the stdio stream to write to, X is the rtx, and
7377 CODE is the operand print code from the output string. */
7380 output_pic_addr_const (FILE *file, rtx x, int code)
7384 switch (GET_CODE (x))
7387 gcc_assert (flag_pic);
7392 if (! TARGET_MACHO || TARGET_64BIT)
7393 output_addr_const (file, x);
7396 const char *name = XSTR (x, 0);
7398 /* Mark the decl as referenced so that cgraph will output the function. */
7399 if (SYMBOL_REF_DECL (x))
7400 mark_decl_referenced (SYMBOL_REF_DECL (x));
7403 if (MACHOPIC_INDIRECT
7404 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7405 name = machopic_indirection_name (x, /*stub_p=*/true);
7407 assemble_name (file, name);
7409 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7410 fputs ("@PLT", file);
7417 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7418 assemble_name (asm_out_file, buf);
7422 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7426 /* This used to output parentheses around the expression,
7427 but that does not work on the 386 (either ATT or BSD assembler). */
7428 output_pic_addr_const (file, XEXP (x, 0), code);
7432 if (GET_MODE (x) == VOIDmode)
7434 /* We can use %d if the number is <32 bits and positive. */
7435 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7436 fprintf (file, "0x%lx%08lx",
7437 (unsigned long) CONST_DOUBLE_HIGH (x),
7438 (unsigned long) CONST_DOUBLE_LOW (x));
7440 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7443 /* We can't handle floating point constants;
7444 PRINT_OPERAND must handle them. */
7445 output_operand_lossage ("floating constant misused");
7449 /* Some assemblers need integer constants to appear first. */
7450 if (CONST_INT_P (XEXP (x, 0)))
7452 output_pic_addr_const (file, XEXP (x, 0), code);
7454 output_pic_addr_const (file, XEXP (x, 1), code);
7458 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7459 output_pic_addr_const (file, XEXP (x, 1), code);
7461 output_pic_addr_const (file, XEXP (x, 0), code);
7467 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7468 output_pic_addr_const (file, XEXP (x, 0), code);
7470 output_pic_addr_const (file, XEXP (x, 1), code);
7472 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7476 gcc_assert (XVECLEN (x, 0) == 1);
7477 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7478 switch (XINT (x, 1))
7481 fputs ("@GOT", file);
7484 fputs ("@GOTOFF", file);
7487 fputs ("@PLTOFF", file);
7489 case UNSPEC_GOTPCREL:
7490 fputs ("@GOTPCREL(%rip)", file);
7492 case UNSPEC_GOTTPOFF:
7493 /* FIXME: This might be @TPOFF in Sun ld too. */
7494 fputs ("@GOTTPOFF", file);
7497 fputs ("@TPOFF", file);
7501 fputs ("@TPOFF", file);
7503 fputs ("@NTPOFF", file);
7506 fputs ("@DTPOFF", file);
7508 case UNSPEC_GOTNTPOFF:
7510 fputs ("@GOTTPOFF(%rip)", file);
7512 fputs ("@GOTNTPOFF", file);
7514 case UNSPEC_INDNTPOFF:
7515 fputs ("@INDNTPOFF", file);
7518 output_operand_lossage ("invalid UNSPEC as operand");
7524 output_operand_lossage ("invalid expression as operand");
7528 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7529 We need to emit DTP-relative relocations. */
7531 static void ATTRIBUTE_UNUSED
7532 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7534 fputs (ASM_LONG, file);
7535 output_addr_const (file, x);
7536 fputs ("@DTPOFF", file);
7542 fputs (", 0", file);
7549 /* In the name of slightly smaller debug output, and to cater to
7550 general assembler lossage, recognize PIC+GOTOFF and turn it back
7551 into a direct symbol reference.
7553 On Darwin, this is necessary to avoid a crash, because Darwin
7554 has a different PIC label for each routine but the DWARF debugging
7555 information is not associated with any particular routine, so it's
7556 necessary to remove references to the PIC label from RTL stored by
7557 the DWARF output code. */
7560 ix86_delegitimize_address (rtx orig_x)
7563 /* reg_addend is NULL or a multiple of some register. */
7564 rtx reg_addend = NULL_RTX;
7565 /* const_addend is NULL or a const_int. */
7566 rtx const_addend = NULL_RTX;
7567 /* This is the result, or NULL. */
7568 rtx result = NULL_RTX;
7575 if (GET_CODE (x) != CONST
7576 || GET_CODE (XEXP (x, 0)) != UNSPEC
7577 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7580 return XVECEXP (XEXP (x, 0), 0, 0);
7583 if (GET_CODE (x) != PLUS
7584 || GET_CODE (XEXP (x, 1)) != CONST)
7587 if (REG_P (XEXP (x, 0))
7588 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7589 /* %ebx + GOT/GOTOFF */
7591 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7593 /* %ebx + %reg * scale + GOT/GOTOFF */
7594 reg_addend = XEXP (x, 0);
7595 if (REG_P (XEXP (reg_addend, 0))
7596 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7597 reg_addend = XEXP (reg_addend, 1);
7598 else if (REG_P (XEXP (reg_addend, 1))
7599 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7600 reg_addend = XEXP (reg_addend, 0);
7603 if (!REG_P (reg_addend)
7604 && GET_CODE (reg_addend) != MULT
7605 && GET_CODE (reg_addend) != ASHIFT)
7611 x = XEXP (XEXP (x, 1), 0);
7612 if (GET_CODE (x) == PLUS
7613 && CONST_INT_P (XEXP (x, 1)))
7615 const_addend = XEXP (x, 1);
7619 if (GET_CODE (x) == UNSPEC
7620 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7621 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7622 result = XVECEXP (x, 0, 0);
7624 if (TARGET_MACHO && darwin_local_data_pic (x)
7626 result = XEXP (x, 0);
7632 result = gen_rtx_PLUS (Pmode, result, const_addend);
7634 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7638 /* If X is a machine specific address (i.e. a symbol or label being
7639 referenced as a displacement from the GOT implemented using an
7640 UNSPEC), then return the base term. Otherwise return X. */
7643 ix86_find_base_term (rtx x)
7649 if (GET_CODE (x) != CONST)
7652 if (GET_CODE (term) == PLUS
7653 && (CONST_INT_P (XEXP (term, 1))
7654 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7655 term = XEXP (term, 0);
7656 if (GET_CODE (term) != UNSPEC
7657 || XINT (term, 1) != UNSPEC_GOTPCREL)
7660 term = XVECEXP (term, 0, 0);
7662 if (GET_CODE (term) != SYMBOL_REF
7663 && GET_CODE (term) != LABEL_REF)
7669 term = ix86_delegitimize_address (x);
7671 if (GET_CODE (term) != SYMBOL_REF
7672 && GET_CODE (term) != LABEL_REF)
7679 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7684 if (mode == CCFPmode || mode == CCFPUmode)
7686 enum rtx_code second_code, bypass_code;
7687 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7688 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7689 code = ix86_fp_compare_code_to_integer (code);
7693 code = reverse_condition (code);
7704 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7708 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7709 Those same assemblers have the same but opposite lossage on cmov. */
7710 gcc_assert (mode == CCmode);
7711 suffix = fp ? "nbe" : "a";
7731 gcc_assert (mode == CCmode);
7753 gcc_assert (mode == CCmode);
7754 suffix = fp ? "nb" : "ae";
7757 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7761 gcc_assert (mode == CCmode);
7765 suffix = fp ? "u" : "p";
7768 suffix = fp ? "nu" : "np";
7773 fputs (suffix, file);
7776 /* Print the name of register X to FILE based on its machine mode and number.
7777 If CODE is 'w', pretend the mode is HImode.
7778 If CODE is 'b', pretend the mode is QImode.
7779 If CODE is 'k', pretend the mode is SImode.
7780 If CODE is 'q', pretend the mode is DImode.
7781 If CODE is 'h', pretend the reg is the 'high' byte register.
7782 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7785 print_reg (rtx x, int code, FILE *file)
7787 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7788 && REGNO (x) != FRAME_POINTER_REGNUM
7789 && REGNO (x) != FLAGS_REG
7790 && REGNO (x) != FPSR_REG
7791 && REGNO (x) != FPCR_REG);
7793 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7796 if (code == 'w' || MMX_REG_P (x))
7798 else if (code == 'b')
7800 else if (code == 'k')
7802 else if (code == 'q')
7804 else if (code == 'y')
7806 else if (code == 'h')
7809 code = GET_MODE_SIZE (GET_MODE (x));
7811 /* Irritatingly, AMD extended registers use different naming convention
7812 from the normal registers. */
7813 if (REX_INT_REG_P (x))
7815 gcc_assert (TARGET_64BIT);
7819 error ("extended registers have no high halves");
7822 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7825 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7828 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7831 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7834 error ("unsupported operand size for extended register");
7842 if (STACK_TOP_P (x))
7844 fputs ("st(0)", file);
7851 if (! ANY_FP_REG_P (x))
7852 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7857 fputs (hi_reg_name[REGNO (x)], file);
7860 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7862 fputs (qi_reg_name[REGNO (x)], file);
7865 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7867 fputs (qi_high_reg_name[REGNO (x)], file);
7874 /* Locate some local-dynamic symbol still in use by this function
7875 so that we can print its name in some tls_local_dynamic_base
7879 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7883 if (GET_CODE (x) == SYMBOL_REF
7884 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7886 cfun->machine->some_ld_name = XSTR (x, 0);
7894 get_some_local_dynamic_name (void)
7898 if (cfun->machine->some_ld_name)
7899 return cfun->machine->some_ld_name;
7901 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7903 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7904 return cfun->machine->some_ld_name;
7910 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7911 C -- print opcode suffix for set/cmov insn.
7912 c -- like C, but print reversed condition
7913 F,f -- likewise, but for floating-point.
7914 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7916 R -- print the prefix for register names.
7917 z -- print the opcode suffix for the size of the current operand.
7918 * -- print a star (in certain assembler syntax)
7919 A -- print an absolute memory reference.
7920 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7921 s -- print a shift double count, followed by the assemblers argument
7923 b -- print the QImode name of the register for the indicated operand.
7924 %b0 would print %al if operands[0] is reg 0.
7925 w -- likewise, print the HImode name of the register.
7926 k -- likewise, print the SImode name of the register.
7927 q -- likewise, print the DImode name of the register.
7928 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7929 y -- print "st(0)" instead of "st" as a register.
7930 D -- print condition for SSE cmp instruction.
7931 P -- if PIC, print an @PLT suffix.
7932 X -- don't print any sort of PIC '@' suffix for a symbol.
7933 & -- print some in-use local-dynamic symbol name.
7934 H -- print a memory address offset by 8; used for sse high-parts
7938 print_operand (FILE *file, rtx x, int code)
7945 if (ASSEMBLER_DIALECT == ASM_ATT)
7950 assemble_name (file, get_some_local_dynamic_name ());
7954 switch (ASSEMBLER_DIALECT)
7961 /* Intel syntax. For absolute addresses, registers should not
7962 be surrounded by braces. */
7966 PRINT_OPERAND (file, x, 0);
7976 PRINT_OPERAND (file, x, 0);
7981 if (ASSEMBLER_DIALECT == ASM_ATT)
7986 if (ASSEMBLER_DIALECT == ASM_ATT)
7991 if (ASSEMBLER_DIALECT == ASM_ATT)
7996 if (ASSEMBLER_DIALECT == ASM_ATT)
8001 if (ASSEMBLER_DIALECT == ASM_ATT)
8006 if (ASSEMBLER_DIALECT == ASM_ATT)
8011 /* 387 opcodes don't get size suffixes if the operands are
8013 if (STACK_REG_P (x))
8016 /* Likewise if using Intel opcodes. */
8017 if (ASSEMBLER_DIALECT == ASM_INTEL)
8020 /* This is the size of op from size of operand. */
8021 switch (GET_MODE_SIZE (GET_MODE (x)))
8028 #ifdef HAVE_GAS_FILDS_FISTS
8034 if (GET_MODE (x) == SFmode)
8049 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8051 #ifdef GAS_MNEMONICS
8077 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8079 PRINT_OPERAND (file, x, 0);
8085 /* Little bit of braindamage here. The SSE compare instructions
8086 does use completely different names for the comparisons that the
8087 fp conditional moves. */
8088 switch (GET_CODE (x))
8103 fputs ("unord", file);
8107 fputs ("neq", file);
8111 fputs ("nlt", file);
8115 fputs ("nle", file);
8118 fputs ("ord", file);
8125 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8126 if (ASSEMBLER_DIALECT == ASM_ATT)
8128 switch (GET_MODE (x))
8130 case HImode: putc ('w', file); break;
8132 case SFmode: putc ('l', file); break;
8134 case DFmode: putc ('q', file); break;
8135 default: gcc_unreachable ();
8142 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8145 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8146 if (ASSEMBLER_DIALECT == ASM_ATT)
8149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8152 /* Like above, but reverse condition */
8154 /* Check to see if argument to %c is really a constant
8155 and not a condition code which needs to be reversed. */
8156 if (!COMPARISON_P (x))
8158 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8161 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8164 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8165 if (ASSEMBLER_DIALECT == ASM_ATT)
8168 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8172 /* It doesn't actually matter what mode we use here, as we're
8173 only going to use this for printing. */
8174 x = adjust_address_nv (x, DImode, 8);
8181 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8184 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8187 int pred_val = INTVAL (XEXP (x, 0));
8189 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8190 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8192 int taken = pred_val > REG_BR_PROB_BASE / 2;
8193 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8195 /* Emit hints only in the case default branch prediction
8196 heuristics would fail. */
8197 if (taken != cputaken)
8199 /* We use 3e (DS) prefix for taken branches and
8200 2e (CS) prefix for not taken branches. */
8202 fputs ("ds ; ", file);
8204 fputs ("cs ; ", file);
8211 output_operand_lossage ("invalid operand code '%c'", code);
8216 print_reg (x, code, file);
8220 /* No `byte ptr' prefix for call instructions. */
8221 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8224 switch (GET_MODE_SIZE (GET_MODE (x)))
8226 case 1: size = "BYTE"; break;
8227 case 2: size = "WORD"; break;
8228 case 4: size = "DWORD"; break;
8229 case 8: size = "QWORD"; break;
8230 case 12: size = "XWORD"; break;
8231 case 16: size = "XMMWORD"; break;
8236 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8239 else if (code == 'w')
8241 else if (code == 'k')
8245 fputs (" PTR ", file);
8249 /* Avoid (%rip) for call operands. */
8250 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8251 && !CONST_INT_P (x))
8252 output_addr_const (file, x);
8253 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8254 output_operand_lossage ("invalid constraints for operand");
8259 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8264 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8265 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8267 if (ASSEMBLER_DIALECT == ASM_ATT)
8269 fprintf (file, "0x%08lx", l);
8272 /* These float cases don't actually occur as immediate operands. */
8273 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8277 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8278 fprintf (file, "%s", dstr);
8281 else if (GET_CODE (x) == CONST_DOUBLE
8282 && GET_MODE (x) == XFmode)
8286 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8287 fprintf (file, "%s", dstr);
8292 /* We have patterns that allow zero sets of memory, for instance.
8293 In 64-bit mode, we should probably support all 8-byte vectors,
8294 since we can in fact encode that into an immediate. */
8295 if (GET_CODE (x) == CONST_VECTOR)
8297 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8303 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8305 if (ASSEMBLER_DIALECT == ASM_ATT)
8308 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8309 || GET_CODE (x) == LABEL_REF)
8311 if (ASSEMBLER_DIALECT == ASM_ATT)
8314 fputs ("OFFSET FLAT:", file);
8317 if (CONST_INT_P (x))
8318 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8320 output_pic_addr_const (file, x, code);
8322 output_addr_const (file, x);
8326 /* Print a memory operand whose address is ADDR. */
8329 print_operand_address (FILE *file, rtx addr)
8331 struct ix86_address parts;
8332 rtx base, index, disp;
8334 int ok = ix86_decompose_address (addr, &parts);
8339 index = parts.index;
8341 scale = parts.scale;
8349 if (USER_LABEL_PREFIX[0] == 0)
8351 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8357 if (!base && !index)
8359 /* Displacement only requires special attention. */
8361 if (CONST_INT_P (disp))
8363 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8365 if (USER_LABEL_PREFIX[0] == 0)
8367 fputs ("ds:", file);
8369 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8372 output_pic_addr_const (file, disp, 0);
8374 output_addr_const (file, disp);
8376 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8379 if (GET_CODE (disp) == CONST
8380 && GET_CODE (XEXP (disp, 0)) == PLUS
8381 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8382 disp = XEXP (XEXP (disp, 0), 0);
8383 if (GET_CODE (disp) == LABEL_REF
8384 || (GET_CODE (disp) == SYMBOL_REF
8385 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8386 fputs ("(%rip)", file);
8391 if (ASSEMBLER_DIALECT == ASM_ATT)
8396 output_pic_addr_const (file, disp, 0);
8397 else if (GET_CODE (disp) == LABEL_REF)
8398 output_asm_label (disp);
8400 output_addr_const (file, disp);
8405 print_reg (base, 0, file);
8409 print_reg (index, 0, file);
8411 fprintf (file, ",%d", scale);
8417 rtx offset = NULL_RTX;
8421 /* Pull out the offset of a symbol; print any symbol itself. */
8422 if (GET_CODE (disp) == CONST
8423 && GET_CODE (XEXP (disp, 0)) == PLUS
8424 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8426 offset = XEXP (XEXP (disp, 0), 1);
8427 disp = gen_rtx_CONST (VOIDmode,
8428 XEXP (XEXP (disp, 0), 0));
8432 output_pic_addr_const (file, disp, 0);
8433 else if (GET_CODE (disp) == LABEL_REF)
8434 output_asm_label (disp);
8435 else if (CONST_INT_P (disp))
8438 output_addr_const (file, disp);
8444 print_reg (base, 0, file);
8447 if (INTVAL (offset) >= 0)
8449 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8460 print_reg (index, 0, file);
8462 fprintf (file, "*%d", scale);
8470 output_addr_const_extra (FILE *file, rtx x)
8474 if (GET_CODE (x) != UNSPEC)
8477 op = XVECEXP (x, 0, 0);
8478 switch (XINT (x, 1))
8480 case UNSPEC_GOTTPOFF:
8481 output_addr_const (file, op);
8482 /* FIXME: This might be @TPOFF in Sun ld. */
8483 fputs ("@GOTTPOFF", file);
8486 output_addr_const (file, op);
8487 fputs ("@TPOFF", file);
8490 output_addr_const (file, op);
8492 fputs ("@TPOFF", file);
8494 fputs ("@NTPOFF", file);
8497 output_addr_const (file, op);
8498 fputs ("@DTPOFF", file);
8500 case UNSPEC_GOTNTPOFF:
8501 output_addr_const (file, op);
8503 fputs ("@GOTTPOFF(%rip)", file);
8505 fputs ("@GOTNTPOFF", file);
8507 case UNSPEC_INDNTPOFF:
8508 output_addr_const (file, op);
8509 fputs ("@INDNTPOFF", file);
8519 /* Split one or more DImode RTL references into pairs of SImode
8520 references. The RTL can be REG, offsettable MEM, integer constant, or
8521 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8522 split and "num" is its length. lo_half and hi_half are output arrays
8523 that parallel "operands". */
8526 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8530 rtx op = operands[num];
8532 /* simplify_subreg refuse to split volatile memory addresses,
8533 but we still have to handle it. */
8536 lo_half[num] = adjust_address (op, SImode, 0);
8537 hi_half[num] = adjust_address (op, SImode, 4);
8541 lo_half[num] = simplify_gen_subreg (SImode, op,
8542 GET_MODE (op) == VOIDmode
8543 ? DImode : GET_MODE (op), 0);
8544 hi_half[num] = simplify_gen_subreg (SImode, op,
8545 GET_MODE (op) == VOIDmode
8546 ? DImode : GET_MODE (op), 4);
8550 /* Split one or more TImode RTL references into pairs of DImode
8551 references. The RTL can be REG, offsettable MEM, integer constant, or
8552 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8553 split and "num" is its length. lo_half and hi_half are output arrays
8554 that parallel "operands". */
8557 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8561 rtx op = operands[num];
8563 /* simplify_subreg refuse to split volatile memory addresses, but we
8564 still have to handle it. */
8567 lo_half[num] = adjust_address (op, DImode, 0);
8568 hi_half[num] = adjust_address (op, DImode, 8);
8572 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8573 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8578 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8579 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8580 is the expression of the binary operation. The output may either be
8581 emitted here, or returned to the caller, like all output_* functions.
8583 There is no guarantee that the operands are the same mode, as they
8584 might be within FLOAT or FLOAT_EXTEND expressions. */
8586 #ifndef SYSV386_COMPAT
8587 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8588 wants to fix the assemblers because that causes incompatibility
8589 with gcc. No-one wants to fix gcc because that causes
8590 incompatibility with assemblers... You can use the option of
8591 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8592 #define SYSV386_COMPAT 1
8596 output_387_binary_op (rtx insn, rtx *operands)
8598 static char buf[30];
8601 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8603 #ifdef ENABLE_CHECKING
8604 /* Even if we do not want to check the inputs, this documents input
8605 constraints. Which helps in understanding the following code. */
8606 if (STACK_REG_P (operands[0])
8607 && ((REG_P (operands[1])
8608 && REGNO (operands[0]) == REGNO (operands[1])
8609 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8610 || (REG_P (operands[2])
8611 && REGNO (operands[0]) == REGNO (operands[2])
8612 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8613 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8616 gcc_assert (is_sse);
8619 switch (GET_CODE (operands[3]))
8622 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8623 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8631 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8632 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8640 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8641 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8649 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8650 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8664 if (GET_MODE (operands[0]) == SFmode)
8665 strcat (buf, "ss\t{%2, %0|%0, %2}");
8667 strcat (buf, "sd\t{%2, %0|%0, %2}");
8672 switch (GET_CODE (operands[3]))
8676 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8678 rtx temp = operands[2];
8679 operands[2] = operands[1];
8683 /* know operands[0] == operands[1]. */
8685 if (MEM_P (operands[2]))
8691 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8693 if (STACK_TOP_P (operands[0]))
8694 /* How is it that we are storing to a dead operand[2]?
8695 Well, presumably operands[1] is dead too. We can't
8696 store the result to st(0) as st(0) gets popped on this
8697 instruction. Instead store to operands[2] (which I
8698 think has to be st(1)). st(1) will be popped later.
8699 gcc <= 2.8.1 didn't have this check and generated
8700 assembly code that the Unixware assembler rejected. */
8701 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8703 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8707 if (STACK_TOP_P (operands[0]))
8708 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8710 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8715 if (MEM_P (operands[1]))
8721 if (MEM_P (operands[2]))
8727 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8730 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8731 derived assemblers, confusingly reverse the direction of
8732 the operation for fsub{r} and fdiv{r} when the
8733 destination register is not st(0). The Intel assembler
8734 doesn't have this brain damage. Read !SYSV386_COMPAT to
8735 figure out what the hardware really does. */
8736 if (STACK_TOP_P (operands[0]))
8737 p = "{p\t%0, %2|rp\t%2, %0}";
8739 p = "{rp\t%2, %0|p\t%0, %2}";
8741 if (STACK_TOP_P (operands[0]))
8742 /* As above for fmul/fadd, we can't store to st(0). */
8743 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8745 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8750 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8753 if (STACK_TOP_P (operands[0]))
8754 p = "{rp\t%0, %1|p\t%1, %0}";
8756 p = "{p\t%1, %0|rp\t%0, %1}";
8758 if (STACK_TOP_P (operands[0]))
8759 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8761 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8766 if (STACK_TOP_P (operands[0]))
8768 if (STACK_TOP_P (operands[1]))
8769 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8771 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8774 else if (STACK_TOP_P (operands[1]))
8777 p = "{\t%1, %0|r\t%0, %1}";
8779 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8785 p = "{r\t%2, %0|\t%0, %2}";
8787 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8800 /* Return needed mode for entity in optimize_mode_switching pass. */
8803 ix86_mode_needed (int entity, rtx insn)
8805 enum attr_i387_cw mode;
8807 /* The mode UNINITIALIZED is used to store control word after a
8808 function call or ASM pattern. The mode ANY specify that function
8809 has no requirements on the control word and make no changes in the
8810 bits we are interested in. */
8813 || (NONJUMP_INSN_P (insn)
8814 && (asm_noperands (PATTERN (insn)) >= 0
8815 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8816 return I387_CW_UNINITIALIZED;
8818 if (recog_memoized (insn) < 0)
8821 mode = get_attr_i387_cw (insn);
8826 if (mode == I387_CW_TRUNC)
8831 if (mode == I387_CW_FLOOR)
8836 if (mode == I387_CW_CEIL)
8841 if (mode == I387_CW_MASK_PM)
8852 /* Output code to initialize control word copies used by trunc?f?i and
8853 rounding patterns. CURRENT_MODE is set to current control word,
8854 while NEW_MODE is set to new control word. */
8857 emit_i387_cw_initialization (int mode)
8859 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8864 rtx reg = gen_reg_rtx (HImode);
8866 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8867 emit_move_insn (reg, copy_rtx (stored_mode));
8869 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8874 /* round toward zero (truncate) */
8875 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8876 slot = SLOT_CW_TRUNC;
8880 /* round down toward -oo */
8881 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8882 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8883 slot = SLOT_CW_FLOOR;
8887 /* round up toward +oo */
8888 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8889 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8890 slot = SLOT_CW_CEIL;
8893 case I387_CW_MASK_PM:
8894 /* mask precision exception for nearbyint() */
8895 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8896 slot = SLOT_CW_MASK_PM;
8908 /* round toward zero (truncate) */
8909 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8910 slot = SLOT_CW_TRUNC;
8914 /* round down toward -oo */
8915 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8916 slot = SLOT_CW_FLOOR;
8920 /* round up toward +oo */
8921 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8922 slot = SLOT_CW_CEIL;
8925 case I387_CW_MASK_PM:
8926 /* mask precision exception for nearbyint() */
8927 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8928 slot = SLOT_CW_MASK_PM;
8936 gcc_assert (slot < MAX_386_STACK_LOCALS);
8938 new_mode = assign_386_stack_local (HImode, slot);
8939 emit_move_insn (new_mode, reg);
8942 /* Output code for INSN to convert a float to a signed int. OPERANDS
8943 are the insn operands. The output may be [HSD]Imode and the input
8944 operand may be [SDX]Fmode. */
8947 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8949 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8950 int dimode_p = GET_MODE (operands[0]) == DImode;
8951 int round_mode = get_attr_i387_cw (insn);
8953 /* Jump through a hoop or two for DImode, since the hardware has no
8954 non-popping instruction. We used to do this a different way, but
8955 that was somewhat fragile and broke with post-reload splitters. */
8956 if ((dimode_p || fisttp) && !stack_top_dies)
8957 output_asm_insn ("fld\t%y1", operands);
8959 gcc_assert (STACK_TOP_P (operands[1]));
8960 gcc_assert (MEM_P (operands[0]));
8963 output_asm_insn ("fisttp%z0\t%0", operands);
8966 if (round_mode != I387_CW_ANY)
8967 output_asm_insn ("fldcw\t%3", operands);
8968 if (stack_top_dies || dimode_p)
8969 output_asm_insn ("fistp%z0\t%0", operands);
8971 output_asm_insn ("fist%z0\t%0", operands);
8972 if (round_mode != I387_CW_ANY)
8973 output_asm_insn ("fldcw\t%2", operands);
8979 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8980 have the values zero or one, indicates the ffreep insn's operand
8981 from the OPERANDS array. */
8984 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8986 if (TARGET_USE_FFREEP)
8987 #if HAVE_AS_IX86_FFREEP
8988 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8991 static char retval[] = ".word\t0xc_df";
8992 int regno = REGNO (operands[opno]);
8994 gcc_assert (FP_REGNO_P (regno));
8996 retval[9] = '0' + (regno - FIRST_STACK_REG);
9001 return opno ? "fstp\t%y1" : "fstp\t%y0";
9005 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9006 should be used. UNORDERED_P is true when fucom should be used. */
9009 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9012 rtx cmp_op0, cmp_op1;
9013 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9017 cmp_op0 = operands[0];
9018 cmp_op1 = operands[1];
9022 cmp_op0 = operands[1];
9023 cmp_op1 = operands[2];
9028 if (GET_MODE (operands[0]) == SFmode)
9030 return "ucomiss\t{%1, %0|%0, %1}";
9032 return "comiss\t{%1, %0|%0, %1}";
9035 return "ucomisd\t{%1, %0|%0, %1}";
9037 return "comisd\t{%1, %0|%0, %1}";
9040 gcc_assert (STACK_TOP_P (cmp_op0));
9042 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9044 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9048 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9049 return output_387_ffreep (operands, 1);
9052 return "ftst\n\tfnstsw\t%0";
9055 if (STACK_REG_P (cmp_op1)
9057 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9058 && REGNO (cmp_op1) != FIRST_STACK_REG)
9060 /* If both the top of the 387 stack dies, and the other operand
9061 is also a stack register that dies, then this must be a
9062 `fcompp' float compare */
9066 /* There is no double popping fcomi variant. Fortunately,
9067 eflags is immune from the fstp's cc clobbering. */
9069 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9071 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9072 return output_387_ffreep (operands, 0);
9077 return "fucompp\n\tfnstsw\t%0";
9079 return "fcompp\n\tfnstsw\t%0";
9084 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9086 static const char * const alt[16] =
9088 "fcom%z2\t%y2\n\tfnstsw\t%0",
9089 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9090 "fucom%z2\t%y2\n\tfnstsw\t%0",
9091 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9093 "ficom%z2\t%y2\n\tfnstsw\t%0",
9094 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9098 "fcomi\t{%y1, %0|%0, %y1}",
9099 "fcomip\t{%y1, %0|%0, %y1}",
9100 "fucomi\t{%y1, %0|%0, %y1}",
9101 "fucomip\t{%y1, %0|%0, %y1}",
9112 mask = eflags_p << 3;
9113 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9114 mask |= unordered_p << 1;
9115 mask |= stack_top_dies;
9117 gcc_assert (mask < 16);
9126 ix86_output_addr_vec_elt (FILE *file, int value)
9128 const char *directive = ASM_LONG;
9132 directive = ASM_QUAD;
9134 gcc_assert (!TARGET_64BIT);
9137 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9141 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9143 const char *directive = ASM_LONG;
9146 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9147 directive = ASM_QUAD;
9149 gcc_assert (!TARGET_64BIT);
9151 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9152 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9153 fprintf (file, "%s%s%d-%s%d\n",
9154 directive, LPREFIX, value, LPREFIX, rel);
9155 else if (HAVE_AS_GOTOFF_IN_DATA)
9156 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9158 else if (TARGET_MACHO)
9160 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9161 machopic_output_function_base_name (file);
9162 fprintf(file, "\n");
9166 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9167 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9170 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9174 ix86_expand_clear (rtx dest)
9178 /* We play register width games, which are only valid after reload. */
9179 gcc_assert (reload_completed);
9181 /* Avoid HImode and its attendant prefix byte. */
9182 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9183 dest = gen_rtx_REG (SImode, REGNO (dest));
9185 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9187 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9188 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9190 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9191 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9197 /* X is an unchanging MEM. If it is a constant pool reference, return
9198 the constant pool rtx, else NULL. */
9201 maybe_get_pool_constant (rtx x)
9203 x = ix86_delegitimize_address (XEXP (x, 0));
9205 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9206 return get_pool_constant (x);
9212 ix86_expand_move (enum machine_mode mode, rtx operands[])
9214 int strict = (reload_in_progress || reload_completed);
9216 enum tls_model model;
9221 if (GET_CODE (op1) == SYMBOL_REF)
9223 model = SYMBOL_REF_TLS_MODEL (op1);
9226 op1 = legitimize_tls_address (op1, model, true);
9227 op1 = force_operand (op1, op0);
9232 else if (GET_CODE (op1) == CONST
9233 && GET_CODE (XEXP (op1, 0)) == PLUS
9234 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9236 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9239 rtx addend = XEXP (XEXP (op1, 0), 1);
9240 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9241 op1 = force_operand (op1, NULL);
9242 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9243 op0, 1, OPTAB_DIRECT);
9249 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9251 if (TARGET_MACHO && !TARGET_64BIT)
9256 rtx temp = ((reload_in_progress
9257 || ((op0 && REG_P (op0))
9259 ? op0 : gen_reg_rtx (Pmode));
9260 op1 = machopic_indirect_data_reference (op1, temp);
9261 op1 = machopic_legitimize_pic_address (op1, mode,
9262 temp == op1 ? 0 : temp);
9264 else if (MACHOPIC_INDIRECT)
9265 op1 = machopic_indirect_data_reference (op1, 0);
9273 op1 = force_reg (Pmode, op1);
9274 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9276 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9277 op1 = legitimize_pic_address (op1, reg);
9286 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9287 || !push_operand (op0, mode))
9289 op1 = force_reg (mode, op1);
9291 if (push_operand (op0, mode)
9292 && ! general_no_elim_operand (op1, mode))
9293 op1 = copy_to_mode_reg (mode, op1);
9295 /* Force large constants in 64bit compilation into register
9296 to get them CSEed. */
9297 if (TARGET_64BIT && mode == DImode
9298 && immediate_operand (op1, mode)
9299 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9300 && !register_operand (op0, mode)
9301 && optimize && !reload_completed && !reload_in_progress)
9302 op1 = copy_to_mode_reg (mode, op1);
9304 if (FLOAT_MODE_P (mode))
9306 /* If we are loading a floating point constant to a register,
9307 force the value to memory now, since we'll get better code
9308 out the back end. */
9312 else if (GET_CODE (op1) == CONST_DOUBLE)
9314 op1 = validize_mem (force_const_mem (mode, op1));
9315 if (!register_operand (op0, mode))
9317 rtx temp = gen_reg_rtx (mode);
9318 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9319 emit_move_insn (op0, temp);
9326 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9330 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9332 rtx op0 = operands[0], op1 = operands[1];
9334 /* Force constants other than zero into memory. We do not know how
9335 the instructions used to build constants modify the upper 64 bits
9336 of the register, once we have that information we may be able
9337 to handle some of them more efficiently. */
9338 if ((reload_in_progress | reload_completed) == 0
9339 && register_operand (op0, mode)
9341 && standard_sse_constant_p (op1) <= 0)
9342 op1 = validize_mem (force_const_mem (mode, op1));
9344 /* Make operand1 a register if it isn't already. */
9346 && !register_operand (op0, mode)
9347 && !register_operand (op1, mode))
9349 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9353 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9356 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9357 straight to ix86_expand_vector_move. */
9358 /* Code generation for scalar reg-reg moves of single and double precision data:
9359 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9363 if (x86_sse_partial_reg_dependency == true)
9368 Code generation for scalar loads of double precision data:
9369 if (x86_sse_split_regs == true)
9370 movlpd mem, reg (gas syntax)
9374 Code generation for unaligned packed loads of single precision data
9375 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9376 if (x86_sse_unaligned_move_optimal)
9379 if (x86_sse_partial_reg_dependency == true)
9391 Code generation for unaligned packed loads of double precision data
9392 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9393 if (x86_sse_unaligned_move_optimal)
9396 if (x86_sse_split_regs == true)
9409 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9418 /* If we're optimizing for size, movups is the smallest. */
9421 op0 = gen_lowpart (V4SFmode, op0);
9422 op1 = gen_lowpart (V4SFmode, op1);
9423 emit_insn (gen_sse_movups (op0, op1));
9427 /* ??? If we have typed data, then it would appear that using
9428 movdqu is the only way to get unaligned data loaded with
9430 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9432 op0 = gen_lowpart (V16QImode, op0);
9433 op1 = gen_lowpart (V16QImode, op1);
9434 emit_insn (gen_sse2_movdqu (op0, op1));
9438 if (TARGET_SSE2 && mode == V2DFmode)
9442 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9444 op0 = gen_lowpart (V2DFmode, op0);
9445 op1 = gen_lowpart (V2DFmode, op1);
9446 emit_insn (gen_sse2_movupd (op0, op1));
9450 /* When SSE registers are split into halves, we can avoid
9451 writing to the top half twice. */
9452 if (TARGET_SSE_SPLIT_REGS)
9454 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9459 /* ??? Not sure about the best option for the Intel chips.
9460 The following would seem to satisfy; the register is
9461 entirely cleared, breaking the dependency chain. We
9462 then store to the upper half, with a dependency depth
9463 of one. A rumor has it that Intel recommends two movsd
9464 followed by an unpacklpd, but this is unconfirmed. And
9465 given that the dependency depth of the unpacklpd would
9466 still be one, I'm not sure why this would be better. */
9467 zero = CONST0_RTX (V2DFmode);
9470 m = adjust_address (op1, DFmode, 0);
9471 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9472 m = adjust_address (op1, DFmode, 8);
9473 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9477 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9479 op0 = gen_lowpart (V4SFmode, op0);
9480 op1 = gen_lowpart (V4SFmode, op1);
9481 emit_insn (gen_sse_movups (op0, op1));
9485 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9486 emit_move_insn (op0, CONST0_RTX (mode));
9488 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9490 if (mode != V4SFmode)
9491 op0 = gen_lowpart (V4SFmode, op0);
9492 m = adjust_address (op1, V2SFmode, 0);
9493 emit_insn (gen_sse_loadlps (op0, op0, m));
9494 m = adjust_address (op1, V2SFmode, 8);
9495 emit_insn (gen_sse_loadhps (op0, op0, m));
9498 else if (MEM_P (op0))
9500 /* If we're optimizing for size, movups is the smallest. */
9503 op0 = gen_lowpart (V4SFmode, op0);
9504 op1 = gen_lowpart (V4SFmode, op1);
9505 emit_insn (gen_sse_movups (op0, op1));
9509 /* ??? Similar to above, only less clear because of quote
9510 typeless stores unquote. */
9511 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9512 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9514 op0 = gen_lowpart (V16QImode, op0);
9515 op1 = gen_lowpart (V16QImode, op1);
9516 emit_insn (gen_sse2_movdqu (op0, op1));
9520 if (TARGET_SSE2 && mode == V2DFmode)
9522 m = adjust_address (op0, DFmode, 0);
9523 emit_insn (gen_sse2_storelpd (m, op1));
9524 m = adjust_address (op0, DFmode, 8);
9525 emit_insn (gen_sse2_storehpd (m, op1));
9529 if (mode != V4SFmode)
9530 op1 = gen_lowpart (V4SFmode, op1);
9531 m = adjust_address (op0, V2SFmode, 0);
9532 emit_insn (gen_sse_storelps (m, op1));
9533 m = adjust_address (op0, V2SFmode, 8);
9534 emit_insn (gen_sse_storehps (m, op1));
9541 /* Expand a push in MODE. This is some mode for which we do not support
9542 proper push instructions, at least from the registers that we expect
9543 the value to live in. */
9546 ix86_expand_push (enum machine_mode mode, rtx x)
9550 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9551 GEN_INT (-GET_MODE_SIZE (mode)),
9552 stack_pointer_rtx, 1, OPTAB_DIRECT);
9553 if (tmp != stack_pointer_rtx)
9554 emit_move_insn (stack_pointer_rtx, tmp);
9556 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9557 emit_move_insn (tmp, x);
9560 /* Helper function of ix86_fixup_binary_operands to canonicalize
9561 operand order. Returns true if the operands should be swapped. */
9564 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9567 rtx dst = operands[0];
9568 rtx src1 = operands[1];
9569 rtx src2 = operands[2];
9571 /* If the operation is not commutative, we can't do anything. */
9572 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9575 /* Highest priority is that src1 should match dst. */
9576 if (rtx_equal_p (dst, src1))
9578 if (rtx_equal_p (dst, src2))
9581 /* Next highest priority is that immediate constants come second. */
9582 if (immediate_operand (src2, mode))
9584 if (immediate_operand (src1, mode))
9587 /* Lowest priority is that memory references should come second. */
9597 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9598 destination to use for the operation. If different from the true
9599 destination in operands[0], a copy operation will be required. */
9602 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9605 rtx dst = operands[0];
9606 rtx src1 = operands[1];
9607 rtx src2 = operands[2];
9609 /* Canonicalize operand order. */
9610 if (ix86_swap_binary_operands_p (code, mode, operands))
9617 /* Both source operands cannot be in memory. */
9618 if (MEM_P (src1) && MEM_P (src2))
9620 /* Optimization: Only read from memory once. */
9621 if (rtx_equal_p (src1, src2))
9623 src2 = force_reg (mode, src2);
9627 src2 = force_reg (mode, src2);
9630 /* If the destination is memory, and we do not have matching source
9631 operands, do things in registers. */
9632 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9633 dst = gen_reg_rtx (mode);
9635 /* Source 1 cannot be a constant. */
9636 if (CONSTANT_P (src1))
9637 src1 = force_reg (mode, src1);
9639 /* Source 1 cannot be a non-matching memory. */
9640 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9641 src1 = force_reg (mode, src1);
9648 /* Similarly, but assume that the destination has already been
9652 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9653 enum machine_mode mode, rtx operands[])
9655 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9656 gcc_assert (dst == operands[0]);
9659 /* Attempt to expand a binary operator. Make the expansion closer to the
9660 actual machine, then just general_operand, which will allow 3 separate
9661 memory references (one output, two input) in a single insn. */
9664 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9667 rtx src1, src2, dst, op, clob;
9669 dst = ix86_fixup_binary_operands (code, mode, operands);
9673 /* Emit the instruction. */
9675 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9676 if (reload_in_progress)
9678 /* Reload doesn't know about the flags register, and doesn't know that
9679 it doesn't want to clobber it. We can only do this with PLUS. */
9680 gcc_assert (code == PLUS);
9685 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9686 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9689 /* Fix up the destination if needed. */
9690 if (dst != operands[0])
9691 emit_move_insn (operands[0], dst);
9694 /* Return TRUE or FALSE depending on whether the binary operator meets the
9695 appropriate constraints. */
9698 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9701 rtx dst = operands[0];
9702 rtx src1 = operands[1];
9703 rtx src2 = operands[2];
9705 /* Both source operands cannot be in memory. */
9706 if (MEM_P (src1) && MEM_P (src2))
9709 /* Canonicalize operand order for commutative operators. */
9710 if (ix86_swap_binary_operands_p (code, mode, operands))
9717 /* If the destination is memory, we must have a matching source operand. */
9718 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9721 /* Source 1 cannot be a constant. */
9722 if (CONSTANT_P (src1))
9725 /* Source 1 cannot be a non-matching memory. */
9726 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9732 /* Attempt to expand a unary operator. Make the expansion closer to the
9733 actual machine, then just general_operand, which will allow 2 separate
9734 memory references (one output, one input) in a single insn. */
9737 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9740 int matching_memory;
9741 rtx src, dst, op, clob;
9746 /* If the destination is memory, and we do not have matching source
9747 operands, do things in registers. */
9748 matching_memory = 0;
9751 if (rtx_equal_p (dst, src))
9752 matching_memory = 1;
9754 dst = gen_reg_rtx (mode);
9757 /* When source operand is memory, destination must match. */
9758 if (MEM_P (src) && !matching_memory)
9759 src = force_reg (mode, src);
9761 /* Emit the instruction. */
9763 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9764 if (reload_in_progress || code == NOT)
9766 /* Reload doesn't know about the flags register, and doesn't know that
9767 it doesn't want to clobber it. */
9768 gcc_assert (code == NOT);
9773 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9774 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9777 /* Fix up the destination if needed. */
9778 if (dst != operands[0])
9779 emit_move_insn (operands[0], dst);
9782 /* Return TRUE or FALSE depending on whether the unary operator meets the
9783 appropriate constraints. */
9786 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9787 enum machine_mode mode ATTRIBUTE_UNUSED,
9788 rtx operands[2] ATTRIBUTE_UNUSED)
9790 /* If one of operands is memory, source and destination must match. */
9791 if ((MEM_P (operands[0])
9792 || MEM_P (operands[1]))
9793 && ! rtx_equal_p (operands[0], operands[1]))
9798 /* Post-reload splitter for converting an SF or DFmode value in an
9799 SSE register into an unsigned SImode. */
9802 ix86_split_convert_uns_si_sse (rtx operands[])
9804 enum machine_mode vecmode;
9805 rtx value, large, zero_or_two31, input, two31, x;
9807 large = operands[1];
9808 zero_or_two31 = operands[2];
9809 input = operands[3];
9810 two31 = operands[4];
9811 vecmode = GET_MODE (large);
9812 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9814 /* Load up the value into the low element. We must ensure that the other
9815 elements are valid floats -- zero is the easiest such value. */
9818 if (vecmode == V4SFmode)
9819 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9821 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9825 input = gen_rtx_REG (vecmode, REGNO (input));
9826 emit_move_insn (value, CONST0_RTX (vecmode));
9827 if (vecmode == V4SFmode)
9828 emit_insn (gen_sse_movss (value, value, input));
9830 emit_insn (gen_sse2_movsd (value, value, input));
9833 emit_move_insn (large, two31);
9834 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9836 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9837 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9839 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9840 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9842 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9843 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9845 large = gen_rtx_REG (V4SImode, REGNO (large));
9846 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9848 x = gen_rtx_REG (V4SImode, REGNO (value));
9849 if (vecmode == V4SFmode)
9850 emit_insn (gen_sse2_cvttps2dq (x, value));
9852 emit_insn (gen_sse2_cvttpd2dq (x, value));
9855 emit_insn (gen_xorv4si3 (value, value, large));
9858 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9859 Expects the 64-bit DImode to be supplied in a pair of integral
9860 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9861 -mfpmath=sse, !optimize_size only. */
9864 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9866 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9867 rtx int_xmm, fp_xmm;
9868 rtx biases, exponents;
9871 int_xmm = gen_reg_rtx (V4SImode);
9872 if (TARGET_INTER_UNIT_MOVES)
9873 emit_insn (gen_movdi_to_sse (int_xmm, input));
9874 else if (TARGET_SSE_SPLIT_REGS)
9876 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9877 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9881 x = gen_reg_rtx (V2DImode);
9882 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9883 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9886 x = gen_rtx_CONST_VECTOR (V4SImode,
9887 gen_rtvec (4, GEN_INT (0x43300000UL),
9888 GEN_INT (0x45300000UL),
9889 const0_rtx, const0_rtx));
9890 exponents = validize_mem (force_const_mem (V4SImode, x));
9892 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9893 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9895 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9896 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9897 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9898 (0x1.0p84 + double(fp_value_hi_xmm)).
9899 Note these exponents differ by 32. */
9901 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9903 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9904 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9905 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9906 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9907 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9908 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9909 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9910 biases = validize_mem (force_const_mem (V2DFmode, biases));
9911 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9913 /* Add the upper and lower DFmode values together. */
9915 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
9918 x = copy_to_mode_reg (V2DFmode, fp_xmm);
9919 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
9920 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
9923 ix86_expand_vector_extract (false, target, fp_xmm, 0);
9926 /* Convert an unsigned SImode value into a DFmode. Only currently used
9927 for SSE, but applicable anywhere. */
9930 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
9932 REAL_VALUE_TYPE TWO31r;
9935 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
9936 NULL, 1, OPTAB_DIRECT);
9938 fp = gen_reg_rtx (DFmode);
9939 emit_insn (gen_floatsidf2 (fp, x));
9941 real_ldexp (&TWO31r, &dconst1, 31);
9942 x = const_double_from_real_value (TWO31r, DFmode);
9944 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
9946 emit_move_insn (target, x);
9949 /* Convert a signed DImode value into a DFmode. Only used for SSE in
9950 32-bit mode; otherwise we have a direct convert instruction. */
9953 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
9955 REAL_VALUE_TYPE TWO32r;
9956 rtx fp_lo, fp_hi, x;
9958 fp_lo = gen_reg_rtx (DFmode);
9959 fp_hi = gen_reg_rtx (DFmode);
9961 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
9963 real_ldexp (&TWO32r, &dconst1, 32);
9964 x = const_double_from_real_value (TWO32r, DFmode);
9965 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
9967 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
9969 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
9972 emit_move_insn (target, x);
9975 /* Convert an unsigned SImode value into a SFmode, using only SSE.
9976 For x86_32, -mfpmath=sse, !optimize_size only. */
9978 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
9980 REAL_VALUE_TYPE ONE16r;
9981 rtx fp_hi, fp_lo, int_hi, int_lo, x;
9983 real_ldexp (&ONE16r, &dconst1, 16);
9984 x = const_double_from_real_value (ONE16r, SFmode);
9985 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
9986 NULL, 0, OPTAB_DIRECT);
9987 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
9988 NULL, 0, OPTAB_DIRECT);
9989 fp_hi = gen_reg_rtx (SFmode);
9990 fp_lo = gen_reg_rtx (SFmode);
9991 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
9992 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
9993 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
9995 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
9997 if (!rtx_equal_p (target, fp_hi))
9998 emit_move_insn (target, fp_hi);
10001 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10002 then replicate the value for all elements of the vector
10006 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10013 v = gen_rtvec (4, value, value, value, value);
10015 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10016 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10017 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10021 v = gen_rtvec (2, value, value);
10023 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10024 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10027 gcc_unreachable ();
10031 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10032 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10033 true, then replicate the mask for all elements of the vector register.
10034 If INVERT is true, then create a mask excluding the sign bit. */
10037 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10039 enum machine_mode vec_mode;
10040 HOST_WIDE_INT hi, lo;
10045 /* Find the sign bit, sign extended to 2*HWI. */
10046 if (mode == SFmode)
10047 lo = 0x80000000, hi = lo < 0;
10048 else if (HOST_BITS_PER_WIDE_INT >= 64)
10049 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10051 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10054 lo = ~lo, hi = ~hi;
10056 /* Force this value into the low part of a fp vector constant. */
10057 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10058 mask = gen_lowpart (mode, mask);
10060 v = ix86_build_const_vector (mode, vect, mask);
10061 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10062 return force_reg (vec_mode, v);
10065 /* Generate code for floating point ABS or NEG. */
10068 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10071 rtx mask, set, use, clob, dst, src;
10072 bool matching_memory;
10073 bool use_sse = false;
10074 bool vector_mode = VECTOR_MODE_P (mode);
10075 enum machine_mode elt_mode = mode;
10079 elt_mode = GET_MODE_INNER (mode);
10082 else if (TARGET_SSE_MATH)
10083 use_sse = SSE_FLOAT_MODE_P (mode);
10085 /* NEG and ABS performed with SSE use bitwise mask operations.
10086 Create the appropriate mask now. */
10088 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10095 /* If the destination is memory, and we don't have matching source
10096 operands or we're using the x87, do things in registers. */
10097 matching_memory = false;
10100 if (use_sse && rtx_equal_p (dst, src))
10101 matching_memory = true;
10103 dst = gen_reg_rtx (mode);
10105 if (MEM_P (src) && !matching_memory)
10106 src = force_reg (mode, src);
10110 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10111 set = gen_rtx_SET (VOIDmode, dst, set);
10116 set = gen_rtx_fmt_e (code, mode, src);
10117 set = gen_rtx_SET (VOIDmode, dst, set);
10120 use = gen_rtx_USE (VOIDmode, mask);
10121 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10122 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10123 gen_rtvec (3, set, use, clob)));
10129 if (dst != operands[0])
10130 emit_move_insn (operands[0], dst);
10133 /* Expand a copysign operation. Special case operand 0 being a constant. */
10136 ix86_expand_copysign (rtx operands[])
10138 enum machine_mode mode, vmode;
10139 rtx dest, op0, op1, mask, nmask;
10141 dest = operands[0];
10145 mode = GET_MODE (dest);
10146 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10148 if (GET_CODE (op0) == CONST_DOUBLE)
10152 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10153 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10155 if (op0 == CONST0_RTX (mode))
10156 op0 = CONST0_RTX (vmode);
10159 if (mode == SFmode)
10160 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10161 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10163 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10164 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10167 mask = ix86_build_signbit_mask (mode, 0, 0);
10169 if (mode == SFmode)
10170 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10172 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10176 nmask = ix86_build_signbit_mask (mode, 0, 1);
10177 mask = ix86_build_signbit_mask (mode, 0, 0);
10179 if (mode == SFmode)
10180 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10182 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10186 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10187 be a constant, and so has already been expanded into a vector constant. */
10190 ix86_split_copysign_const (rtx operands[])
10192 enum machine_mode mode, vmode;
10193 rtx dest, op0, op1, mask, x;
10195 dest = operands[0];
10198 mask = operands[3];
10200 mode = GET_MODE (dest);
10201 vmode = GET_MODE (mask);
10203 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10204 x = gen_rtx_AND (vmode, dest, mask);
10205 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10207 if (op0 != CONST0_RTX (vmode))
10209 x = gen_rtx_IOR (vmode, dest, op0);
10210 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10214 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10215 so we have to do two masks. */
10218 ix86_split_copysign_var (rtx operands[])
10220 enum machine_mode mode, vmode;
10221 rtx dest, scratch, op0, op1, mask, nmask, x;
10223 dest = operands[0];
10224 scratch = operands[1];
10227 nmask = operands[4];
10228 mask = operands[5];
10230 mode = GET_MODE (dest);
10231 vmode = GET_MODE (mask);
10233 if (rtx_equal_p (op0, op1))
10235 /* Shouldn't happen often (it's useless, obviously), but when it does
10236 we'd generate incorrect code if we continue below. */
10237 emit_move_insn (dest, op0);
10241 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10243 gcc_assert (REGNO (op1) == REGNO (scratch));
10245 x = gen_rtx_AND (vmode, scratch, mask);
10246 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10249 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10250 x = gen_rtx_NOT (vmode, dest);
10251 x = gen_rtx_AND (vmode, x, op0);
10252 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10256 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10258 x = gen_rtx_AND (vmode, scratch, mask);
10260 else /* alternative 2,4 */
10262 gcc_assert (REGNO (mask) == REGNO (scratch));
10263 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10264 x = gen_rtx_AND (vmode, scratch, op1);
10266 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10268 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10270 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10271 x = gen_rtx_AND (vmode, dest, nmask);
10273 else /* alternative 3,4 */
10275 gcc_assert (REGNO (nmask) == REGNO (dest));
10277 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10278 x = gen_rtx_AND (vmode, dest, op0);
10280 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10283 x = gen_rtx_IOR (vmode, dest, scratch);
10284 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10287 /* Return TRUE or FALSE depending on whether the first SET in INSN
10288 has source and destination with matching CC modes, and that the
10289 CC mode is at least as constrained as REQ_MODE. */
10292 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10295 enum machine_mode set_mode;
10297 set = PATTERN (insn);
10298 if (GET_CODE (set) == PARALLEL)
10299 set = XVECEXP (set, 0, 0);
10300 gcc_assert (GET_CODE (set) == SET);
10301 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10303 set_mode = GET_MODE (SET_DEST (set));
10307 if (req_mode != CCNOmode
10308 && (req_mode != CCmode
10309 || XEXP (SET_SRC (set), 1) != const0_rtx))
10313 if (req_mode == CCGCmode)
10317 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10321 if (req_mode == CCZmode)
10328 gcc_unreachable ();
10331 return (GET_MODE (SET_SRC (set)) == set_mode);
10334 /* Generate insn patterns to do an integer compare of OPERANDS. */
10337 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10339 enum machine_mode cmpmode;
10342 cmpmode = SELECT_CC_MODE (code, op0, op1);
10343 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10345 /* This is very simple, but making the interface the same as in the
10346 FP case makes the rest of the code easier. */
10347 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10348 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10350 /* Return the test that should be put into the flags user, i.e.
10351 the bcc, scc, or cmov instruction. */
10352 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10355 /* Figure out whether to use ordered or unordered fp comparisons.
10356 Return the appropriate mode to use. */
10359 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10361 /* ??? In order to make all comparisons reversible, we do all comparisons
10362 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10363 all forms trapping and nontrapping comparisons, we can make inequality
10364 comparisons trapping again, since it results in better code when using
10365 FCOM based compares. */
10366 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10370 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10372 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10373 return ix86_fp_compare_mode (code);
10376 /* Only zero flag is needed. */
10377 case EQ: /* ZF=0 */
10378 case NE: /* ZF!=0 */
10380 /* Codes needing carry flag. */
10381 case GEU: /* CF=0 */
10382 case GTU: /* CF=0 & ZF=0 */
10383 case LTU: /* CF=1 */
10384 case LEU: /* CF=1 | ZF=1 */
10386 /* Codes possibly doable only with sign flag when
10387 comparing against zero. */
10388 case GE: /* SF=OF or SF=0 */
10389 case LT: /* SF<>OF or SF=1 */
10390 if (op1 == const0_rtx)
10393 /* For other cases Carry flag is not required. */
10395 /* Codes doable only with sign flag when comparing
10396 against zero, but we miss jump instruction for it
10397 so we need to use relational tests against overflow
10398 that thus needs to be zero. */
10399 case GT: /* ZF=0 & SF=OF */
10400 case LE: /* ZF=1 | SF<>OF */
10401 if (op1 == const0_rtx)
10405 /* strcmp pattern do (use flags) and combine may ask us for proper
10410 gcc_unreachable ();
10414 /* Return the fixed registers used for condition codes. */
10417 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10424 /* If two condition code modes are compatible, return a condition code
10425 mode which is compatible with both. Otherwise, return
10428 static enum machine_mode
10429 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10434 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10437 if ((m1 == CCGCmode && m2 == CCGOCmode)
10438 || (m1 == CCGOCmode && m2 == CCGCmode))
10444 gcc_unreachable ();
10466 /* These are only compatible with themselves, which we already
10472 /* Split comparison code CODE into comparisons we can do using branch
10473 instructions. BYPASS_CODE is comparison code for branch that will
10474 branch around FIRST_CODE and SECOND_CODE. If some of branches
10475 is not required, set value to UNKNOWN.
10476 We never require more than two branches. */
10479 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10480 enum rtx_code *first_code,
10481 enum rtx_code *second_code)
10483 *first_code = code;
10484 *bypass_code = UNKNOWN;
10485 *second_code = UNKNOWN;
10487 /* The fcomi comparison sets flags as follows:
10497 case GT: /* GTU - CF=0 & ZF=0 */
10498 case GE: /* GEU - CF=0 */
10499 case ORDERED: /* PF=0 */
10500 case UNORDERED: /* PF=1 */
10501 case UNEQ: /* EQ - ZF=1 */
10502 case UNLT: /* LTU - CF=1 */
10503 case UNLE: /* LEU - CF=1 | ZF=1 */
10504 case LTGT: /* EQ - ZF=0 */
10506 case LT: /* LTU - CF=1 - fails on unordered */
10507 *first_code = UNLT;
10508 *bypass_code = UNORDERED;
10510 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10511 *first_code = UNLE;
10512 *bypass_code = UNORDERED;
10514 case EQ: /* EQ - ZF=1 - fails on unordered */
10515 *first_code = UNEQ;
10516 *bypass_code = UNORDERED;
10518 case NE: /* NE - ZF=0 - fails on unordered */
10519 *first_code = LTGT;
10520 *second_code = UNORDERED;
10522 case UNGE: /* GEU - CF=0 - fails on unordered */
10524 *second_code = UNORDERED;
10526 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10528 *second_code = UNORDERED;
10531 gcc_unreachable ();
10533 if (!TARGET_IEEE_FP)
10535 *second_code = UNKNOWN;
10536 *bypass_code = UNKNOWN;
10540 /* Return cost of comparison done fcom + arithmetics operations on AX.
10541 All following functions do use number of instructions as a cost metrics.
10542 In future this should be tweaked to compute bytes for optimize_size and
10543 take into account performance of various instructions on various CPUs. */
10545 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10547 if (!TARGET_IEEE_FP)
10549 /* The cost of code output by ix86_expand_fp_compare. */
10573 gcc_unreachable ();
10577 /* Return cost of comparison done using fcomi operation.
10578 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10580 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10582 enum rtx_code bypass_code, first_code, second_code;
10583 /* Return arbitrarily high cost when instruction is not supported - this
10584 prevents gcc from using it. */
10587 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10588 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10591 /* Return cost of comparison done using sahf operation.
10592 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10594 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10596 enum rtx_code bypass_code, first_code, second_code;
10597 /* Return arbitrarily high cost when instruction is not preferred - this
10598 avoids gcc from using it. */
10599 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10601 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10602 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10605 /* Compute cost of the comparison done using any method.
10606 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10608 ix86_fp_comparison_cost (enum rtx_code code)
10610 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10613 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10614 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10616 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10617 if (min > sahf_cost)
10619 if (min > fcomi_cost)
10624 /* Return true if we should use an FCOMI instruction for this
10628 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10630 enum rtx_code swapped_code = swap_condition (code);
10632 return ((ix86_fp_comparison_cost (code)
10633 == ix86_fp_comparison_fcomi_cost (code))
10634 || (ix86_fp_comparison_cost (swapped_code)
10635 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10638 /* Swap, force into registers, or otherwise massage the two operands
10639 to a fp comparison. The operands are updated in place; the new
10640 comparison code is returned. */
10642 static enum rtx_code
10643 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10645 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10646 rtx op0 = *pop0, op1 = *pop1;
10647 enum machine_mode op_mode = GET_MODE (op0);
10648 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10650 /* All of the unordered compare instructions only work on registers.
10651 The same is true of the fcomi compare instructions. The XFmode
10652 compare instructions require registers except when comparing
10653 against zero or when converting operand 1 from fixed point to
10657 && (fpcmp_mode == CCFPUmode
10658 || (op_mode == XFmode
10659 && ! (standard_80387_constant_p (op0) == 1
10660 || standard_80387_constant_p (op1) == 1)
10661 && GET_CODE (op1) != FLOAT)
10662 || ix86_use_fcomi_compare (code)))
10664 op0 = force_reg (op_mode, op0);
10665 op1 = force_reg (op_mode, op1);
10669 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10670 things around if they appear profitable, otherwise force op0
10671 into a register. */
10673 if (standard_80387_constant_p (op0) == 0
10675 && ! (standard_80387_constant_p (op1) == 0
10679 tmp = op0, op0 = op1, op1 = tmp;
10680 code = swap_condition (code);
10684 op0 = force_reg (op_mode, op0);
10686 if (CONSTANT_P (op1))
10688 int tmp = standard_80387_constant_p (op1);
10690 op1 = validize_mem (force_const_mem (op_mode, op1));
10694 op1 = force_reg (op_mode, op1);
10697 op1 = force_reg (op_mode, op1);
10701 /* Try to rearrange the comparison to make it cheaper. */
10702 if (ix86_fp_comparison_cost (code)
10703 > ix86_fp_comparison_cost (swap_condition (code))
10704 && (REG_P (op1) || !no_new_pseudos))
10707 tmp = op0, op0 = op1, op1 = tmp;
10708 code = swap_condition (code);
10710 op0 = force_reg (op_mode, op0);
10718 /* Convert comparison codes we use to represent FP comparison to integer
10719 code that will result in proper branch. Return UNKNOWN if no such code
10723 ix86_fp_compare_code_to_integer (enum rtx_code code)
10752 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10755 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10756 rtx *second_test, rtx *bypass_test)
10758 enum machine_mode fpcmp_mode, intcmp_mode;
10760 int cost = ix86_fp_comparison_cost (code);
10761 enum rtx_code bypass_code, first_code, second_code;
10763 fpcmp_mode = ix86_fp_compare_mode (code);
10764 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10767 *second_test = NULL_RTX;
10769 *bypass_test = NULL_RTX;
10771 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10773 /* Do fcomi/sahf based test when profitable. */
10774 if ((TARGET_CMOVE || TARGET_SAHF)
10775 && (bypass_code == UNKNOWN || bypass_test)
10776 && (second_code == UNKNOWN || second_test)
10777 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10781 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10782 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10788 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10789 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10791 scratch = gen_reg_rtx (HImode);
10792 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10793 emit_insn (gen_x86_sahf_1 (scratch));
10796 /* The FP codes work out to act like unsigned. */
10797 intcmp_mode = fpcmp_mode;
10799 if (bypass_code != UNKNOWN)
10800 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10801 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10803 if (second_code != UNKNOWN)
10804 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10805 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10810 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10811 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10812 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10814 scratch = gen_reg_rtx (HImode);
10815 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10817 /* In the unordered case, we have to check C2 for NaN's, which
10818 doesn't happen to work out to anything nice combination-wise.
10819 So do some bit twiddling on the value we've got in AH to come
10820 up with an appropriate set of condition codes. */
10822 intcmp_mode = CCNOmode;
10827 if (code == GT || !TARGET_IEEE_FP)
10829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10837 intcmp_mode = CCmode;
10843 if (code == LT && TARGET_IEEE_FP)
10845 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10846 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10847 intcmp_mode = CCmode;
10852 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10858 if (code == GE || !TARGET_IEEE_FP)
10860 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10865 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10866 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10873 if (code == LE && TARGET_IEEE_FP)
10875 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10876 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10877 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10878 intcmp_mode = CCmode;
10883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10889 if (code == EQ && TARGET_IEEE_FP)
10891 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10892 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10893 intcmp_mode = CCmode;
10898 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10905 if (code == NE && TARGET_IEEE_FP)
10907 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10908 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10914 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10924 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10929 gcc_unreachable ();
10933 /* Return the test that should be put into the flags user, i.e.
10934 the bcc, scc, or cmov instruction. */
10935 return gen_rtx_fmt_ee (code, VOIDmode,
10936 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10941 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10944 op0 = ix86_compare_op0;
10945 op1 = ix86_compare_op1;
10948 *second_test = NULL_RTX;
10950 *bypass_test = NULL_RTX;
10952 if (ix86_compare_emitted)
10954 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10955 ix86_compare_emitted = NULL_RTX;
10957 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10958 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10959 second_test, bypass_test);
10961 ret = ix86_expand_int_compare (code, op0, op1);
10966 /* Return true if the CODE will result in nontrivial jump sequence. */
10968 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10970 enum rtx_code bypass_code, first_code, second_code;
10973 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10974 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10978 ix86_expand_branch (enum rtx_code code, rtx label)
10982 /* If we have emitted a compare insn, go straight to simple.
10983 ix86_expand_compare won't emit anything if ix86_compare_emitted
10985 if (ix86_compare_emitted)
10988 switch (GET_MODE (ix86_compare_op0))
10994 tmp = ix86_expand_compare (code, NULL, NULL);
10995 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10996 gen_rtx_LABEL_REF (VOIDmode, label),
10998 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11007 enum rtx_code bypass_code, first_code, second_code;
11009 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11010 &ix86_compare_op1);
11012 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11014 /* Check whether we will use the natural sequence with one jump. If
11015 so, we can expand jump early. Otherwise delay expansion by
11016 creating compound insn to not confuse optimizers. */
11017 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11020 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11021 gen_rtx_LABEL_REF (VOIDmode, label),
11022 pc_rtx, NULL_RTX, NULL_RTX);
11026 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11027 ix86_compare_op0, ix86_compare_op1);
11028 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11029 gen_rtx_LABEL_REF (VOIDmode, label),
11031 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11033 use_fcomi = ix86_use_fcomi_compare (code);
11034 vec = rtvec_alloc (3 + !use_fcomi);
11035 RTVEC_ELT (vec, 0) = tmp;
11037 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11039 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11042 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11044 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11053 /* Expand DImode branch into multiple compare+branch. */
11055 rtx lo[2], hi[2], label2;
11056 enum rtx_code code1, code2, code3;
11057 enum machine_mode submode;
11059 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11061 tmp = ix86_compare_op0;
11062 ix86_compare_op0 = ix86_compare_op1;
11063 ix86_compare_op1 = tmp;
11064 code = swap_condition (code);
11066 if (GET_MODE (ix86_compare_op0) == DImode)
11068 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11069 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11074 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11075 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11079 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11080 avoid two branches. This costs one extra insn, so disable when
11081 optimizing for size. */
11083 if ((code == EQ || code == NE)
11085 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11090 if (hi[1] != const0_rtx)
11091 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11092 NULL_RTX, 0, OPTAB_WIDEN);
11095 if (lo[1] != const0_rtx)
11096 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11097 NULL_RTX, 0, OPTAB_WIDEN);
11099 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11100 NULL_RTX, 0, OPTAB_WIDEN);
11102 ix86_compare_op0 = tmp;
11103 ix86_compare_op1 = const0_rtx;
11104 ix86_expand_branch (code, label);
11108 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11109 op1 is a constant and the low word is zero, then we can just
11110 examine the high word. */
11112 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11115 case LT: case LTU: case GE: case GEU:
11116 ix86_compare_op0 = hi[0];
11117 ix86_compare_op1 = hi[1];
11118 ix86_expand_branch (code, label);
11124 /* Otherwise, we need two or three jumps. */
11126 label2 = gen_label_rtx ();
11129 code2 = swap_condition (code);
11130 code3 = unsigned_condition (code);
11134 case LT: case GT: case LTU: case GTU:
11137 case LE: code1 = LT; code2 = GT; break;
11138 case GE: code1 = GT; code2 = LT; break;
11139 case LEU: code1 = LTU; code2 = GTU; break;
11140 case GEU: code1 = GTU; code2 = LTU; break;
11142 case EQ: code1 = UNKNOWN; code2 = NE; break;
11143 case NE: code2 = UNKNOWN; break;
11146 gcc_unreachable ();
11151 * if (hi(a) < hi(b)) goto true;
11152 * if (hi(a) > hi(b)) goto false;
11153 * if (lo(a) < lo(b)) goto true;
11157 ix86_compare_op0 = hi[0];
11158 ix86_compare_op1 = hi[1];
11160 if (code1 != UNKNOWN)
11161 ix86_expand_branch (code1, label);
11162 if (code2 != UNKNOWN)
11163 ix86_expand_branch (code2, label2);
11165 ix86_compare_op0 = lo[0];
11166 ix86_compare_op1 = lo[1];
11167 ix86_expand_branch (code3, label);
11169 if (code2 != UNKNOWN)
11170 emit_label (label2);
11175 gcc_unreachable ();
11179 /* Split branch based on floating point condition. */
11181 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11182 rtx target1, rtx target2, rtx tmp, rtx pushed)
11184 rtx second, bypass;
11185 rtx label = NULL_RTX;
11187 int bypass_probability = -1, second_probability = -1, probability = -1;
11190 if (target2 != pc_rtx)
11193 code = reverse_condition_maybe_unordered (code);
11198 condition = ix86_expand_fp_compare (code, op1, op2,
11199 tmp, &second, &bypass);
11201 /* Remove pushed operand from stack. */
11203 ix86_free_from_memory (GET_MODE (pushed));
11205 if (split_branch_probability >= 0)
11207 /* Distribute the probabilities across the jumps.
11208 Assume the BYPASS and SECOND to be always test
11210 probability = split_branch_probability;
11212 /* Value of 1 is low enough to make no need for probability
11213 to be updated. Later we may run some experiments and see
11214 if unordered values are more frequent in practice. */
11216 bypass_probability = 1;
11218 second_probability = 1;
11220 if (bypass != NULL_RTX)
11222 label = gen_label_rtx ();
11223 i = emit_jump_insn (gen_rtx_SET
11225 gen_rtx_IF_THEN_ELSE (VOIDmode,
11227 gen_rtx_LABEL_REF (VOIDmode,
11230 if (bypass_probability >= 0)
11232 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11233 GEN_INT (bypass_probability),
11236 i = emit_jump_insn (gen_rtx_SET
11238 gen_rtx_IF_THEN_ELSE (VOIDmode,
11239 condition, target1, target2)));
11240 if (probability >= 0)
11242 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11243 GEN_INT (probability),
11245 if (second != NULL_RTX)
11247 i = emit_jump_insn (gen_rtx_SET
11249 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11251 if (second_probability >= 0)
11253 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11254 GEN_INT (second_probability),
11257 if (label != NULL_RTX)
11258 emit_label (label);
11262 ix86_expand_setcc (enum rtx_code code, rtx dest)
11264 rtx ret, tmp, tmpreg, equiv;
11265 rtx second_test, bypass_test;
11267 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11268 return 0; /* FAIL */
11270 gcc_assert (GET_MODE (dest) == QImode);
11272 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11273 PUT_MODE (ret, QImode);
11278 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11279 if (bypass_test || second_test)
11281 rtx test = second_test;
11283 rtx tmp2 = gen_reg_rtx (QImode);
11286 gcc_assert (!second_test);
11287 test = bypass_test;
11289 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11291 PUT_MODE (test, QImode);
11292 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11295 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11297 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11300 /* Attach a REG_EQUAL note describing the comparison result. */
11301 if (ix86_compare_op0 && ix86_compare_op1)
11303 equiv = simplify_gen_relational (code, QImode,
11304 GET_MODE (ix86_compare_op0),
11305 ix86_compare_op0, ix86_compare_op1);
11306 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11309 return 1; /* DONE */
11312 /* Expand comparison setting or clearing carry flag. Return true when
11313 successful and set pop for the operation. */
11315 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11317 enum machine_mode mode =
11318 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11320 /* Do not handle DImode compares that go through special path. Also we can't
11321 deal with FP compares yet. This is possible to add. */
11322 if (mode == (TARGET_64BIT ? TImode : DImode))
11324 if (FLOAT_MODE_P (mode))
11326 rtx second_test = NULL, bypass_test = NULL;
11327 rtx compare_op, compare_seq;
11329 /* Shortcut: following common codes never translate into carry flag compares. */
11330 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11331 || code == ORDERED || code == UNORDERED)
11334 /* These comparisons require zero flag; swap operands so they won't. */
11335 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11336 && !TARGET_IEEE_FP)
11341 code = swap_condition (code);
11344 /* Try to expand the comparison and verify that we end up with carry flag
11345 based comparison. This is fails to be true only when we decide to expand
11346 comparison using arithmetic that is not too common scenario. */
11348 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11349 &second_test, &bypass_test);
11350 compare_seq = get_insns ();
11353 if (second_test || bypass_test)
11355 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11356 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11357 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11359 code = GET_CODE (compare_op);
11360 if (code != LTU && code != GEU)
11362 emit_insn (compare_seq);
11366 if (!INTEGRAL_MODE_P (mode))
11374 /* Convert a==0 into (unsigned)a<1. */
11377 if (op1 != const0_rtx)
11380 code = (code == EQ ? LTU : GEU);
11383 /* Convert a>b into b<a or a>=b-1. */
11386 if (CONST_INT_P (op1))
11388 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11389 /* Bail out on overflow. We still can swap operands but that
11390 would force loading of the constant into register. */
11391 if (op1 == const0_rtx
11392 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11394 code = (code == GTU ? GEU : LTU);
11401 code = (code == GTU ? LTU : GEU);
11405 /* Convert a>=0 into (unsigned)a<0x80000000. */
11408 if (mode == DImode || op1 != const0_rtx)
11410 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11411 code = (code == LT ? GEU : LTU);
11415 if (mode == DImode || op1 != constm1_rtx)
11417 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11418 code = (code == LE ? GEU : LTU);
11424 /* Swapping operands may cause constant to appear as first operand. */
11425 if (!nonimmediate_operand (op0, VOIDmode))
11427 if (no_new_pseudos)
11429 op0 = force_reg (mode, op0);
11431 ix86_compare_op0 = op0;
11432 ix86_compare_op1 = op1;
11433 *pop = ix86_expand_compare (code, NULL, NULL);
11434 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11439 ix86_expand_int_movcc (rtx operands[])
11441 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11442 rtx compare_seq, compare_op;
11443 rtx second_test, bypass_test;
11444 enum machine_mode mode = GET_MODE (operands[0]);
11445 bool sign_bit_compare_p = false;;
11448 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11449 compare_seq = get_insns ();
11452 compare_code = GET_CODE (compare_op);
11454 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11455 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11456 sign_bit_compare_p = true;
11458 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11459 HImode insns, we'd be swallowed in word prefix ops. */
11461 if ((mode != HImode || TARGET_FAST_PREFIX)
11462 && (mode != (TARGET_64BIT ? TImode : DImode))
11463 && CONST_INT_P (operands[2])
11464 && CONST_INT_P (operands[3]))
11466 rtx out = operands[0];
11467 HOST_WIDE_INT ct = INTVAL (operands[2]);
11468 HOST_WIDE_INT cf = INTVAL (operands[3]);
11469 HOST_WIDE_INT diff;
11472 /* Sign bit compares are better done using shifts than we do by using
11474 if (sign_bit_compare_p
11475 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11476 ix86_compare_op1, &compare_op))
11478 /* Detect overlap between destination and compare sources. */
11481 if (!sign_bit_compare_p)
11483 bool fpcmp = false;
11485 compare_code = GET_CODE (compare_op);
11487 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11488 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11491 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11494 /* To simplify rest of code, restrict to the GEU case. */
11495 if (compare_code == LTU)
11497 HOST_WIDE_INT tmp = ct;
11500 compare_code = reverse_condition (compare_code);
11501 code = reverse_condition (code);
11506 PUT_CODE (compare_op,
11507 reverse_condition_maybe_unordered
11508 (GET_CODE (compare_op)));
11510 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11514 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11515 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11516 tmp = gen_reg_rtx (mode);
11518 if (mode == DImode)
11519 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11521 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11525 if (code == GT || code == GE)
11526 code = reverse_condition (code);
11529 HOST_WIDE_INT tmp = ct;
11534 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11535 ix86_compare_op1, VOIDmode, 0, -1);
11548 tmp = expand_simple_binop (mode, PLUS,
11550 copy_rtx (tmp), 1, OPTAB_DIRECT);
11561 tmp = expand_simple_binop (mode, IOR,
11563 copy_rtx (tmp), 1, OPTAB_DIRECT);
11565 else if (diff == -1 && ct)
11575 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11577 tmp = expand_simple_binop (mode, PLUS,
11578 copy_rtx (tmp), GEN_INT (cf),
11579 copy_rtx (tmp), 1, OPTAB_DIRECT);
11587 * andl cf - ct, dest
11597 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11600 tmp = expand_simple_binop (mode, AND,
11602 gen_int_mode (cf - ct, mode),
11603 copy_rtx (tmp), 1, OPTAB_DIRECT);
11605 tmp = expand_simple_binop (mode, PLUS,
11606 copy_rtx (tmp), GEN_INT (ct),
11607 copy_rtx (tmp), 1, OPTAB_DIRECT);
11610 if (!rtx_equal_p (tmp, out))
11611 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11613 return 1; /* DONE */
11619 tmp = ct, ct = cf, cf = tmp;
11621 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11623 /* We may be reversing unordered compare to normal compare, that
11624 is not valid in general (we may convert non-trapping condition
11625 to trapping one), however on i386 we currently emit all
11626 comparisons unordered. */
11627 compare_code = reverse_condition_maybe_unordered (compare_code);
11628 code = reverse_condition_maybe_unordered (code);
11632 compare_code = reverse_condition (compare_code);
11633 code = reverse_condition (code);
11637 compare_code = UNKNOWN;
11638 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11639 && CONST_INT_P (ix86_compare_op1))
11641 if (ix86_compare_op1 == const0_rtx
11642 && (code == LT || code == GE))
11643 compare_code = code;
11644 else if (ix86_compare_op1 == constm1_rtx)
11648 else if (code == GT)
11653 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11654 if (compare_code != UNKNOWN
11655 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11656 && (cf == -1 || ct == -1))
11658 /* If lea code below could be used, only optimize
11659 if it results in a 2 insn sequence. */
11661 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11662 || diff == 3 || diff == 5 || diff == 9)
11663 || (compare_code == LT && ct == -1)
11664 || (compare_code == GE && cf == -1))
11667 * notl op1 (if necessary)
11675 code = reverse_condition (code);
11678 out = emit_store_flag (out, code, ix86_compare_op0,
11679 ix86_compare_op1, VOIDmode, 0, -1);
11681 out = expand_simple_binop (mode, IOR,
11683 out, 1, OPTAB_DIRECT);
11684 if (out != operands[0])
11685 emit_move_insn (operands[0], out);
11687 return 1; /* DONE */
11692 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11693 || diff == 3 || diff == 5 || diff == 9)
11694 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11696 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11702 * lea cf(dest*(ct-cf)),dest
11706 * This also catches the degenerate setcc-only case.
11712 out = emit_store_flag (out, code, ix86_compare_op0,
11713 ix86_compare_op1, VOIDmode, 0, 1);
11716 /* On x86_64 the lea instruction operates on Pmode, so we need
11717 to get arithmetics done in proper mode to match. */
11719 tmp = copy_rtx (out);
11723 out1 = copy_rtx (out);
11724 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11728 tmp = gen_rtx_PLUS (mode, tmp, out1);
11734 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11737 if (!rtx_equal_p (tmp, out))
11740 out = force_operand (tmp, copy_rtx (out));
11742 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11744 if (!rtx_equal_p (out, operands[0]))
11745 emit_move_insn (operands[0], copy_rtx (out));
11747 return 1; /* DONE */
11751 * General case: Jumpful:
11752 * xorl dest,dest cmpl op1, op2
11753 * cmpl op1, op2 movl ct, dest
11754 * setcc dest jcc 1f
11755 * decl dest movl cf, dest
11756 * andl (cf-ct),dest 1:
11759 * Size 20. Size 14.
11761 * This is reasonably steep, but branch mispredict costs are
11762 * high on modern cpus, so consider failing only if optimizing
11766 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11767 && BRANCH_COST >= 2)
11773 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11774 /* We may be reversing unordered compare to normal compare,
11775 that is not valid in general (we may convert non-trapping
11776 condition to trapping one), however on i386 we currently
11777 emit all comparisons unordered. */
11778 code = reverse_condition_maybe_unordered (code);
11781 code = reverse_condition (code);
11782 if (compare_code != UNKNOWN)
11783 compare_code = reverse_condition (compare_code);
11787 if (compare_code != UNKNOWN)
11789 /* notl op1 (if needed)
11794 For x < 0 (resp. x <= -1) there will be no notl,
11795 so if possible swap the constants to get rid of the
11797 True/false will be -1/0 while code below (store flag
11798 followed by decrement) is 0/-1, so the constants need
11799 to be exchanged once more. */
11801 if (compare_code == GE || !cf)
11803 code = reverse_condition (code);
11808 HOST_WIDE_INT tmp = cf;
11813 out = emit_store_flag (out, code, ix86_compare_op0,
11814 ix86_compare_op1, VOIDmode, 0, -1);
11818 out = emit_store_flag (out, code, ix86_compare_op0,
11819 ix86_compare_op1, VOIDmode, 0, 1);
11821 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11822 copy_rtx (out), 1, OPTAB_DIRECT);
11825 out = expand_simple_binop (mode, AND, copy_rtx (out),
11826 gen_int_mode (cf - ct, mode),
11827 copy_rtx (out), 1, OPTAB_DIRECT);
11829 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11830 copy_rtx (out), 1, OPTAB_DIRECT);
11831 if (!rtx_equal_p (out, operands[0]))
11832 emit_move_insn (operands[0], copy_rtx (out));
11834 return 1; /* DONE */
11838 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11840 /* Try a few things more with specific constants and a variable. */
11843 rtx var, orig_out, out, tmp;
11845 if (BRANCH_COST <= 2)
11846 return 0; /* FAIL */
11848 /* If one of the two operands is an interesting constant, load a
11849 constant with the above and mask it in with a logical operation. */
11851 if (CONST_INT_P (operands[2]))
11854 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11855 operands[3] = constm1_rtx, op = and_optab;
11856 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11857 operands[3] = const0_rtx, op = ior_optab;
11859 return 0; /* FAIL */
11861 else if (CONST_INT_P (operands[3]))
11864 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11865 operands[2] = constm1_rtx, op = and_optab;
11866 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11867 operands[2] = const0_rtx, op = ior_optab;
11869 return 0; /* FAIL */
11872 return 0; /* FAIL */
11874 orig_out = operands[0];
11875 tmp = gen_reg_rtx (mode);
11878 /* Recurse to get the constant loaded. */
11879 if (ix86_expand_int_movcc (operands) == 0)
11880 return 0; /* FAIL */
11882 /* Mask in the interesting variable. */
11883 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11885 if (!rtx_equal_p (out, orig_out))
11886 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11888 return 1; /* DONE */
11892 * For comparison with above,
11902 if (! nonimmediate_operand (operands[2], mode))
11903 operands[2] = force_reg (mode, operands[2]);
11904 if (! nonimmediate_operand (operands[3], mode))
11905 operands[3] = force_reg (mode, operands[3]);
11907 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11909 rtx tmp = gen_reg_rtx (mode);
11910 emit_move_insn (tmp, operands[3]);
11913 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11915 rtx tmp = gen_reg_rtx (mode);
11916 emit_move_insn (tmp, operands[2]);
11920 if (! register_operand (operands[2], VOIDmode)
11922 || ! register_operand (operands[3], VOIDmode)))
11923 operands[2] = force_reg (mode, operands[2]);
11926 && ! register_operand (operands[3], VOIDmode))
11927 operands[3] = force_reg (mode, operands[3]);
11929 emit_insn (compare_seq);
11930 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11931 gen_rtx_IF_THEN_ELSE (mode,
11932 compare_op, operands[2],
11935 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11936 gen_rtx_IF_THEN_ELSE (mode,
11938 copy_rtx (operands[3]),
11939 copy_rtx (operands[0]))));
11941 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11942 gen_rtx_IF_THEN_ELSE (mode,
11944 copy_rtx (operands[2]),
11945 copy_rtx (operands[0]))));
11947 return 1; /* DONE */
11950 /* Swap, force into registers, or otherwise massage the two operands
11951 to an sse comparison with a mask result. Thus we differ a bit from
11952 ix86_prepare_fp_compare_args which expects to produce a flags result.
11954 The DEST operand exists to help determine whether to commute commutative
11955 operators. The POP0/POP1 operands are updated in place. The new
11956 comparison code is returned, or UNKNOWN if not implementable. */
11958 static enum rtx_code
11959 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11960 rtx *pop0, rtx *pop1)
11968 /* We have no LTGT as an operator. We could implement it with
11969 NE & ORDERED, but this requires an extra temporary. It's
11970 not clear that it's worth it. */
11977 /* These are supported directly. */
11984 /* For commutative operators, try to canonicalize the destination
11985 operand to be first in the comparison - this helps reload to
11986 avoid extra moves. */
11987 if (!dest || !rtx_equal_p (dest, *pop1))
11995 /* These are not supported directly. Swap the comparison operands
11996 to transform into something that is supported. */
12000 code = swap_condition (code);
12004 gcc_unreachable ();
12010 /* Detect conditional moves that exactly match min/max operational
12011 semantics. Note that this is IEEE safe, as long as we don't
12012 interchange the operands.
12014 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12015 and TRUE if the operation is successful and instructions are emitted. */
12018 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12019 rtx cmp_op1, rtx if_true, rtx if_false)
12021 enum machine_mode mode;
12027 else if (code == UNGE)
12030 if_true = if_false;
12036 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12038 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12043 mode = GET_MODE (dest);
12045 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12046 but MODE may be a vector mode and thus not appropriate. */
12047 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12049 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12052 if_true = force_reg (mode, if_true);
12053 v = gen_rtvec (2, if_true, if_false);
12054 tmp = gen_rtx_UNSPEC (mode, v, u);
12058 code = is_min ? SMIN : SMAX;
12059 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12062 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12066 /* Expand an sse vector comparison. Return the register with the result. */
12069 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12070 rtx op_true, rtx op_false)
12072 enum machine_mode mode = GET_MODE (dest);
12075 cmp_op0 = force_reg (mode, cmp_op0);
12076 if (!nonimmediate_operand (cmp_op1, mode))
12077 cmp_op1 = force_reg (mode, cmp_op1);
12080 || reg_overlap_mentioned_p (dest, op_true)
12081 || reg_overlap_mentioned_p (dest, op_false))
12082 dest = gen_reg_rtx (mode);
12084 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12085 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12090 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12091 operations. This is used for both scalar and vector conditional moves. */
12094 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12096 enum machine_mode mode = GET_MODE (dest);
12099 if (op_false == CONST0_RTX (mode))
12101 op_true = force_reg (mode, op_true);
12102 x = gen_rtx_AND (mode, cmp, op_true);
12103 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12105 else if (op_true == CONST0_RTX (mode))
12107 op_false = force_reg (mode, op_false);
12108 x = gen_rtx_NOT (mode, cmp);
12109 x = gen_rtx_AND (mode, x, op_false);
12110 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12114 op_true = force_reg (mode, op_true);
12115 op_false = force_reg (mode, op_false);
12117 t2 = gen_reg_rtx (mode);
12119 t3 = gen_reg_rtx (mode);
12123 x = gen_rtx_AND (mode, op_true, cmp);
12124 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12126 x = gen_rtx_NOT (mode, cmp);
12127 x = gen_rtx_AND (mode, x, op_false);
12128 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12130 x = gen_rtx_IOR (mode, t3, t2);
12131 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12135 /* Expand a floating-point conditional move. Return true if successful. */
12138 ix86_expand_fp_movcc (rtx operands[])
12140 enum machine_mode mode = GET_MODE (operands[0]);
12141 enum rtx_code code = GET_CODE (operands[1]);
12142 rtx tmp, compare_op, second_test, bypass_test;
12144 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12146 enum machine_mode cmode;
12148 /* Since we've no cmove for sse registers, don't force bad register
12149 allocation just to gain access to it. Deny movcc when the
12150 comparison mode doesn't match the move mode. */
12151 cmode = GET_MODE (ix86_compare_op0);
12152 if (cmode == VOIDmode)
12153 cmode = GET_MODE (ix86_compare_op1);
12157 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12159 &ix86_compare_op1);
12160 if (code == UNKNOWN)
12163 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12164 ix86_compare_op1, operands[2],
12168 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12169 ix86_compare_op1, operands[2], operands[3]);
12170 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12174 /* The floating point conditional move instructions don't directly
12175 support conditions resulting from a signed integer comparison. */
12177 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12179 /* The floating point conditional move instructions don't directly
12180 support signed integer comparisons. */
12182 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12184 gcc_assert (!second_test && !bypass_test);
12185 tmp = gen_reg_rtx (QImode);
12186 ix86_expand_setcc (code, tmp);
12188 ix86_compare_op0 = tmp;
12189 ix86_compare_op1 = const0_rtx;
12190 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12192 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12194 tmp = gen_reg_rtx (mode);
12195 emit_move_insn (tmp, operands[3]);
12198 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12200 tmp = gen_reg_rtx (mode);
12201 emit_move_insn (tmp, operands[2]);
12205 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12206 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12207 operands[2], operands[3])));
12209 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12210 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12211 operands[3], operands[0])));
12213 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12214 gen_rtx_IF_THEN_ELSE (mode, second_test,
12215 operands[2], operands[0])));
12220 /* Expand a floating-point vector conditional move; a vcond operation
12221 rather than a movcc operation. */
12224 ix86_expand_fp_vcond (rtx operands[])
12226 enum rtx_code code = GET_CODE (operands[3]);
12229 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12230 &operands[4], &operands[5]);
12231 if (code == UNKNOWN)
12234 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12235 operands[5], operands[1], operands[2]))
12238 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12239 operands[1], operands[2]);
12240 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12244 /* Expand a signed integral vector conditional move. */
12247 ix86_expand_int_vcond (rtx operands[])
12249 enum machine_mode mode = GET_MODE (operands[0]);
12250 enum rtx_code code = GET_CODE (operands[3]);
12251 bool negate = false;
12254 cop0 = operands[4];
12255 cop1 = operands[5];
12257 /* Canonicalize the comparison to EQ, GT, GTU. */
12268 code = reverse_condition (code);
12274 code = reverse_condition (code);
12280 code = swap_condition (code);
12281 x = cop0, cop0 = cop1, cop1 = x;
12285 gcc_unreachable ();
12288 /* Unsigned parallel compare is not supported by the hardware. Play some
12289 tricks to turn this into a signed comparison against 0. */
12292 cop0 = force_reg (mode, cop0);
12300 /* Perform a parallel modulo subtraction. */
12301 t1 = gen_reg_rtx (mode);
12302 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12304 /* Extract the original sign bit of op0. */
12305 mask = GEN_INT (-0x80000000);
12306 mask = gen_rtx_CONST_VECTOR (mode,
12307 gen_rtvec (4, mask, mask, mask, mask));
12308 mask = force_reg (mode, mask);
12309 t2 = gen_reg_rtx (mode);
12310 emit_insn (gen_andv4si3 (t2, cop0, mask));
12312 /* XOR it back into the result of the subtraction. This results
12313 in the sign bit set iff we saw unsigned underflow. */
12314 x = gen_reg_rtx (mode);
12315 emit_insn (gen_xorv4si3 (x, t1, t2));
12323 /* Perform a parallel unsigned saturating subtraction. */
12324 x = gen_reg_rtx (mode);
12325 emit_insn (gen_rtx_SET (VOIDmode, x,
12326 gen_rtx_US_MINUS (mode, cop0, cop1)));
12333 gcc_unreachable ();
12337 cop1 = CONST0_RTX (mode);
12340 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12341 operands[1+negate], operands[2-negate]);
12343 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12344 operands[2-negate]);
12348 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12349 true if we should do zero extension, else sign extension. HIGH_P is
12350 true if we want the N/2 high elements, else the low elements. */
12353 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12355 enum machine_mode imode = GET_MODE (operands[1]);
12356 rtx (*unpack)(rtx, rtx, rtx);
12363 unpack = gen_vec_interleave_highv16qi;
12365 unpack = gen_vec_interleave_lowv16qi;
12369 unpack = gen_vec_interleave_highv8hi;
12371 unpack = gen_vec_interleave_lowv8hi;
12375 unpack = gen_vec_interleave_highv4si;
12377 unpack = gen_vec_interleave_lowv4si;
12380 gcc_unreachable ();
12383 dest = gen_lowpart (imode, operands[0]);
12386 se = force_reg (imode, CONST0_RTX (imode));
12388 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12389 operands[1], pc_rtx, pc_rtx);
12391 emit_insn (unpack (dest, operands[1], se));
12394 /* Expand conditional increment or decrement using adb/sbb instructions.
12395 The default case using setcc followed by the conditional move can be
12396 done by generic code. */
12398 ix86_expand_int_addcc (rtx operands[])
12400 enum rtx_code code = GET_CODE (operands[1]);
12402 rtx val = const0_rtx;
12403 bool fpcmp = false;
12404 enum machine_mode mode = GET_MODE (operands[0]);
12406 if (operands[3] != const1_rtx
12407 && operands[3] != constm1_rtx)
12409 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12410 ix86_compare_op1, &compare_op))
12412 code = GET_CODE (compare_op);
12414 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12415 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12418 code = ix86_fp_compare_code_to_integer (code);
12425 PUT_CODE (compare_op,
12426 reverse_condition_maybe_unordered
12427 (GET_CODE (compare_op)));
12429 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12431 PUT_MODE (compare_op, mode);
12433 /* Construct either adc or sbb insn. */
12434 if ((code == LTU) == (operands[3] == constm1_rtx))
12436 switch (GET_MODE (operands[0]))
12439 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12442 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12445 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12448 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12451 gcc_unreachable ();
12456 switch (GET_MODE (operands[0]))
12459 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12462 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12465 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12468 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12471 gcc_unreachable ();
12474 return 1; /* DONE */
12478 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12479 works for floating pointer parameters and nonoffsetable memories.
12480 For pushes, it returns just stack offsets; the values will be saved
12481 in the right order. Maximally three parts are generated. */
12484 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12489 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12491 size = (GET_MODE_SIZE (mode) + 4) / 8;
12493 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12494 gcc_assert (size >= 2 && size <= 3);
12496 /* Optimize constant pool reference to immediates. This is used by fp
12497 moves, that force all constants to memory to allow combining. */
12498 if (MEM_P (operand) && MEM_READONLY_P (operand))
12500 rtx tmp = maybe_get_pool_constant (operand);
12505 if (MEM_P (operand) && !offsettable_memref_p (operand))
12507 /* The only non-offsetable memories we handle are pushes. */
12508 int ok = push_operand (operand, VOIDmode);
12512 operand = copy_rtx (operand);
12513 PUT_MODE (operand, Pmode);
12514 parts[0] = parts[1] = parts[2] = operand;
12518 if (GET_CODE (operand) == CONST_VECTOR)
12520 enum machine_mode imode = int_mode_for_mode (mode);
12521 /* Caution: if we looked through a constant pool memory above,
12522 the operand may actually have a different mode now. That's
12523 ok, since we want to pun this all the way back to an integer. */
12524 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12525 gcc_assert (operand != NULL);
12531 if (mode == DImode)
12532 split_di (&operand, 1, &parts[0], &parts[1]);
12535 if (REG_P (operand))
12537 gcc_assert (reload_completed);
12538 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12539 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12541 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12543 else if (offsettable_memref_p (operand))
12545 operand = adjust_address (operand, SImode, 0);
12546 parts[0] = operand;
12547 parts[1] = adjust_address (operand, SImode, 4);
12549 parts[2] = adjust_address (operand, SImode, 8);
12551 else if (GET_CODE (operand) == CONST_DOUBLE)
12556 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12560 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12561 parts[2] = gen_int_mode (l[2], SImode);
12564 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12567 gcc_unreachable ();
12569 parts[1] = gen_int_mode (l[1], SImode);
12570 parts[0] = gen_int_mode (l[0], SImode);
12573 gcc_unreachable ();
12578 if (mode == TImode)
12579 split_ti (&operand, 1, &parts[0], &parts[1]);
12580 if (mode == XFmode || mode == TFmode)
12582 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12583 if (REG_P (operand))
12585 gcc_assert (reload_completed);
12586 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12587 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12589 else if (offsettable_memref_p (operand))
12591 operand = adjust_address (operand, DImode, 0);
12592 parts[0] = operand;
12593 parts[1] = adjust_address (operand, upper_mode, 8);
12595 else if (GET_CODE (operand) == CONST_DOUBLE)
12600 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12601 real_to_target (l, &r, mode);
12603 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12604 if (HOST_BITS_PER_WIDE_INT >= 64)
12607 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12608 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12611 parts[0] = immed_double_const (l[0], l[1], DImode);
12613 if (upper_mode == SImode)
12614 parts[1] = gen_int_mode (l[2], SImode);
12615 else if (HOST_BITS_PER_WIDE_INT >= 64)
12618 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12619 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12622 parts[1] = immed_double_const (l[2], l[3], DImode);
12625 gcc_unreachable ();
12632 /* Emit insns to perform a move or push of DI, DF, and XF values.
12633 Return false when normal moves are needed; true when all required
12634 insns have been emitted. Operands 2-4 contain the input values
12635 int the correct order; operands 5-7 contain the output values. */
12638 ix86_split_long_move (rtx operands[])
12643 int collisions = 0;
12644 enum machine_mode mode = GET_MODE (operands[0]);
12646 /* The DFmode expanders may ask us to move double.
12647 For 64bit target this is single move. By hiding the fact
12648 here we simplify i386.md splitters. */
12649 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12651 /* Optimize constant pool reference to immediates. This is used by
12652 fp moves, that force all constants to memory to allow combining. */
12654 if (MEM_P (operands[1])
12655 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12656 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12657 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12658 if (push_operand (operands[0], VOIDmode))
12660 operands[0] = copy_rtx (operands[0]);
12661 PUT_MODE (operands[0], Pmode);
12664 operands[0] = gen_lowpart (DImode, operands[0]);
12665 operands[1] = gen_lowpart (DImode, operands[1]);
12666 emit_move_insn (operands[0], operands[1]);
12670 /* The only non-offsettable memory we handle is push. */
12671 if (push_operand (operands[0], VOIDmode))
12674 gcc_assert (!MEM_P (operands[0])
12675 || offsettable_memref_p (operands[0]));
12677 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12678 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12680 /* When emitting push, take care for source operands on the stack. */
12681 if (push && MEM_P (operands[1])
12682 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12685 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12686 XEXP (part[1][2], 0));
12687 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12688 XEXP (part[1][1], 0));
12691 /* We need to do copy in the right order in case an address register
12692 of the source overlaps the destination. */
12693 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12695 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12697 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12700 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12703 /* Collision in the middle part can be handled by reordering. */
12704 if (collisions == 1 && nparts == 3
12705 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12708 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12709 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12712 /* If there are more collisions, we can't handle it by reordering.
12713 Do an lea to the last part and use only one colliding move. */
12714 else if (collisions > 1)
12720 base = part[0][nparts - 1];
12722 /* Handle the case when the last part isn't valid for lea.
12723 Happens in 64-bit mode storing the 12-byte XFmode. */
12724 if (GET_MODE (base) != Pmode)
12725 base = gen_rtx_REG (Pmode, REGNO (base));
12727 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12728 part[1][0] = replace_equiv_address (part[1][0], base);
12729 part[1][1] = replace_equiv_address (part[1][1],
12730 plus_constant (base, UNITS_PER_WORD));
12732 part[1][2] = replace_equiv_address (part[1][2],
12733 plus_constant (base, 8));
12743 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12744 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12745 emit_move_insn (part[0][2], part[1][2]);
12750 /* In 64bit mode we don't have 32bit push available. In case this is
12751 register, it is OK - we will just use larger counterpart. We also
12752 retype memory - these comes from attempt to avoid REX prefix on
12753 moving of second half of TFmode value. */
12754 if (GET_MODE (part[1][1]) == SImode)
12756 switch (GET_CODE (part[1][1]))
12759 part[1][1] = adjust_address (part[1][1], DImode, 0);
12763 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12767 gcc_unreachable ();
12770 if (GET_MODE (part[1][0]) == SImode)
12771 part[1][0] = part[1][1];
12774 emit_move_insn (part[0][1], part[1][1]);
12775 emit_move_insn (part[0][0], part[1][0]);
12779 /* Choose correct order to not overwrite the source before it is copied. */
12780 if ((REG_P (part[0][0])
12781 && REG_P (part[1][1])
12782 && (REGNO (part[0][0]) == REGNO (part[1][1])
12784 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12786 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12790 operands[2] = part[0][2];
12791 operands[3] = part[0][1];
12792 operands[4] = part[0][0];
12793 operands[5] = part[1][2];
12794 operands[6] = part[1][1];
12795 operands[7] = part[1][0];
12799 operands[2] = part[0][1];
12800 operands[3] = part[0][0];
12801 operands[5] = part[1][1];
12802 operands[6] = part[1][0];
12809 operands[2] = part[0][0];
12810 operands[3] = part[0][1];
12811 operands[4] = part[0][2];
12812 operands[5] = part[1][0];
12813 operands[6] = part[1][1];
12814 operands[7] = part[1][2];
12818 operands[2] = part[0][0];
12819 operands[3] = part[0][1];
12820 operands[5] = part[1][0];
12821 operands[6] = part[1][1];
12825 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12828 if (CONST_INT_P (operands[5])
12829 && operands[5] != const0_rtx
12830 && REG_P (operands[2]))
12832 if (CONST_INT_P (operands[6])
12833 && INTVAL (operands[6]) == INTVAL (operands[5]))
12834 operands[6] = operands[2];
12837 && CONST_INT_P (operands[7])
12838 && INTVAL (operands[7]) == INTVAL (operands[5]))
12839 operands[7] = operands[2];
12843 && CONST_INT_P (operands[6])
12844 && operands[6] != const0_rtx
12845 && REG_P (operands[3])
12846 && CONST_INT_P (operands[7])
12847 && INTVAL (operands[7]) == INTVAL (operands[6]))
12848 operands[7] = operands[3];
12851 emit_move_insn (operands[2], operands[5]);
12852 emit_move_insn (operands[3], operands[6]);
12854 emit_move_insn (operands[4], operands[7]);
12859 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12860 left shift by a constant, either using a single shift or
12861 a sequence of add instructions. */
12864 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12868 emit_insn ((mode == DImode
12870 : gen_adddi3) (operand, operand, operand));
12872 else if (!optimize_size
12873 && count * ix86_cost->add <= ix86_cost->shift_const)
12876 for (i=0; i<count; i++)
12878 emit_insn ((mode == DImode
12880 : gen_adddi3) (operand, operand, operand));
12884 emit_insn ((mode == DImode
12886 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12890 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12892 rtx low[2], high[2];
12894 const int single_width = mode == DImode ? 32 : 64;
12896 if (CONST_INT_P (operands[2]))
12898 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12899 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12901 if (count >= single_width)
12903 emit_move_insn (high[0], low[1]);
12904 emit_move_insn (low[0], const0_rtx);
12906 if (count > single_width)
12907 ix86_expand_ashl_const (high[0], count - single_width, mode);
12911 if (!rtx_equal_p (operands[0], operands[1]))
12912 emit_move_insn (operands[0], operands[1]);
12913 emit_insn ((mode == DImode
12915 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12916 ix86_expand_ashl_const (low[0], count, mode);
12921 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12923 if (operands[1] == const1_rtx)
12925 /* Assuming we've chosen a QImode capable registers, then 1 << N
12926 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12927 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12929 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12931 ix86_expand_clear (low[0]);
12932 ix86_expand_clear (high[0]);
12933 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12935 d = gen_lowpart (QImode, low[0]);
12936 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12937 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12938 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12940 d = gen_lowpart (QImode, high[0]);
12941 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12942 s = gen_rtx_NE (QImode, flags, const0_rtx);
12943 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12946 /* Otherwise, we can get the same results by manually performing
12947 a bit extract operation on bit 5/6, and then performing the two
12948 shifts. The two methods of getting 0/1 into low/high are exactly
12949 the same size. Avoiding the shift in the bit extract case helps
12950 pentium4 a bit; no one else seems to care much either way. */
12955 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12956 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12958 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12959 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12961 emit_insn ((mode == DImode
12963 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12964 emit_insn ((mode == DImode
12966 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12967 emit_move_insn (low[0], high[0]);
12968 emit_insn ((mode == DImode
12970 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12973 emit_insn ((mode == DImode
12975 : gen_ashldi3) (low[0], low[0], operands[2]));
12976 emit_insn ((mode == DImode
12978 : gen_ashldi3) (high[0], high[0], operands[2]));
12982 if (operands[1] == constm1_rtx)
12984 /* For -1 << N, we can avoid the shld instruction, because we
12985 know that we're shifting 0...31/63 ones into a -1. */
12986 emit_move_insn (low[0], constm1_rtx);
12988 emit_move_insn (high[0], low[0]);
12990 emit_move_insn (high[0], constm1_rtx);
12994 if (!rtx_equal_p (operands[0], operands[1]))
12995 emit_move_insn (operands[0], operands[1]);
12997 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12998 emit_insn ((mode == DImode
13000 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13003 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13005 if (TARGET_CMOVE && scratch)
13007 ix86_expand_clear (scratch);
13008 emit_insn ((mode == DImode
13009 ? gen_x86_shift_adj_1
13010 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13013 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13017 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13019 rtx low[2], high[2];
13021 const int single_width = mode == DImode ? 32 : 64;
13023 if (CONST_INT_P (operands[2]))
13025 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13026 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13028 if (count == single_width * 2 - 1)
13030 emit_move_insn (high[0], high[1]);
13031 emit_insn ((mode == DImode
13033 : gen_ashrdi3) (high[0], high[0],
13034 GEN_INT (single_width - 1)));
13035 emit_move_insn (low[0], high[0]);
13038 else if (count >= single_width)
13040 emit_move_insn (low[0], high[1]);
13041 emit_move_insn (high[0], low[0]);
13042 emit_insn ((mode == DImode
13044 : gen_ashrdi3) (high[0], high[0],
13045 GEN_INT (single_width - 1)));
13046 if (count > single_width)
13047 emit_insn ((mode == DImode
13049 : gen_ashrdi3) (low[0], low[0],
13050 GEN_INT (count - single_width)));
13054 if (!rtx_equal_p (operands[0], operands[1]))
13055 emit_move_insn (operands[0], operands[1]);
13056 emit_insn ((mode == DImode
13058 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13059 emit_insn ((mode == DImode
13061 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13066 if (!rtx_equal_p (operands[0], operands[1]))
13067 emit_move_insn (operands[0], operands[1]);
13069 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13071 emit_insn ((mode == DImode
13073 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13074 emit_insn ((mode == DImode
13076 : gen_ashrdi3) (high[0], high[0], operands[2]));
13078 if (TARGET_CMOVE && scratch)
13080 emit_move_insn (scratch, high[0]);
13081 emit_insn ((mode == DImode
13083 : gen_ashrdi3) (scratch, scratch,
13084 GEN_INT (single_width - 1)));
13085 emit_insn ((mode == DImode
13086 ? gen_x86_shift_adj_1
13087 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13091 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13096 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13098 rtx low[2], high[2];
13100 const int single_width = mode == DImode ? 32 : 64;
13102 if (CONST_INT_P (operands[2]))
13104 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13105 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13107 if (count >= single_width)
13109 emit_move_insn (low[0], high[1]);
13110 ix86_expand_clear (high[0]);
13112 if (count > single_width)
13113 emit_insn ((mode == DImode
13115 : gen_lshrdi3) (low[0], low[0],
13116 GEN_INT (count - single_width)));
13120 if (!rtx_equal_p (operands[0], operands[1]))
13121 emit_move_insn (operands[0], operands[1]);
13122 emit_insn ((mode == DImode
13124 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13125 emit_insn ((mode == DImode
13127 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13132 if (!rtx_equal_p (operands[0], operands[1]))
13133 emit_move_insn (operands[0], operands[1]);
13135 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13137 emit_insn ((mode == DImode
13139 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13140 emit_insn ((mode == DImode
13142 : gen_lshrdi3) (high[0], high[0], operands[2]));
13144 /* Heh. By reversing the arguments, we can reuse this pattern. */
13145 if (TARGET_CMOVE && scratch)
13147 ix86_expand_clear (scratch);
13148 emit_insn ((mode == DImode
13149 ? gen_x86_shift_adj_1
13150 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13154 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13158 /* Predict just emitted jump instruction to be taken with probability PROB. */
13160 predict_jump (int prob)
13162 rtx insn = get_last_insn ();
13163 gcc_assert (JUMP_P (insn));
13165 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13170 /* Helper function for the string operations below. Dest VARIABLE whether
13171 it is aligned to VALUE bytes. If true, jump to the label. */
13173 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13175 rtx label = gen_label_rtx ();
13176 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13177 if (GET_MODE (variable) == DImode)
13178 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13180 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13181 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13184 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13186 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13190 /* Adjust COUNTER by the VALUE. */
13192 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13194 if (GET_MODE (countreg) == DImode)
13195 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13197 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13200 /* Zero extend possibly SImode EXP to Pmode register. */
13202 ix86_zero_extend_to_Pmode (rtx exp)
13205 if (GET_MODE (exp) == VOIDmode)
13206 return force_reg (Pmode, exp);
13207 if (GET_MODE (exp) == Pmode)
13208 return copy_to_mode_reg (Pmode, exp);
13209 r = gen_reg_rtx (Pmode);
13210 emit_insn (gen_zero_extendsidi2 (r, exp));
13214 /* Divide COUNTREG by SCALE. */
13216 scale_counter (rtx countreg, int scale)
13219 rtx piece_size_mask;
13223 if (CONST_INT_P (countreg))
13224 return GEN_INT (INTVAL (countreg) / scale);
13225 gcc_assert (REG_P (countreg));
13227 piece_size_mask = GEN_INT (scale - 1);
13228 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13229 GEN_INT (exact_log2 (scale)),
13230 NULL, 1, OPTAB_DIRECT);
13234 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13235 for constant loop counts. */
13237 static enum machine_mode
13238 counter_mode (rtx count_exp)
13240 if (GET_MODE (count_exp) != VOIDmode)
13241 return GET_MODE (count_exp);
13242 if (GET_CODE (count_exp) != CONST_INT)
13244 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13249 /* When SRCPTR is non-NULL, output simple loop to move memory
13250 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13251 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13252 equivalent loop to set memory by VALUE (supposed to be in MODE).
13254 The size is rounded down to whole number of chunk size moved at once.
13255 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13259 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13260 rtx destptr, rtx srcptr, rtx value,
13261 rtx count, enum machine_mode mode, int unroll,
13264 rtx out_label, top_label, iter, tmp;
13265 enum machine_mode iter_mode = counter_mode (count);
13266 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13267 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13273 top_label = gen_label_rtx ();
13274 out_label = gen_label_rtx ();
13275 iter = gen_reg_rtx (iter_mode);
13277 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13278 NULL, 1, OPTAB_DIRECT);
13279 /* Those two should combine. */
13280 if (piece_size == const1_rtx)
13282 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13284 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13286 emit_move_insn (iter, const0_rtx);
13288 emit_label (top_label);
13290 tmp = convert_modes (Pmode, iter_mode, iter, true);
13291 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13292 destmem = change_address (destmem, mode, x_addr);
13296 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13297 srcmem = change_address (srcmem, mode, y_addr);
13299 /* When unrolling for chips that reorder memory reads and writes,
13300 we can save registers by using single temporary.
13301 Also using 4 temporaries is overkill in 32bit mode. */
13302 if (!TARGET_64BIT && 0)
13304 for (i = 0; i < unroll; i++)
13309 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13311 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13313 emit_move_insn (destmem, srcmem);
13319 gcc_assert (unroll <= 4);
13320 for (i = 0; i < unroll; i++)
13322 tmpreg[i] = gen_reg_rtx (mode);
13326 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13328 emit_move_insn (tmpreg[i], srcmem);
13330 for (i = 0; i < unroll; i++)
13335 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13337 emit_move_insn (destmem, tmpreg[i]);
13342 for (i = 0; i < unroll; i++)
13346 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13347 emit_move_insn (destmem, value);
13350 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13351 true, OPTAB_LIB_WIDEN);
13353 emit_move_insn (iter, tmp);
13355 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13357 if (expected_size != -1)
13359 expected_size /= GET_MODE_SIZE (mode) * unroll;
13360 if (expected_size == 0)
13362 else if (expected_size > REG_BR_PROB_BASE)
13363 predict_jump (REG_BR_PROB_BASE - 1);
13365 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13368 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13369 iter = ix86_zero_extend_to_Pmode (iter);
13370 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13371 true, OPTAB_LIB_WIDEN);
13372 if (tmp != destptr)
13373 emit_move_insn (destptr, tmp);
13376 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13377 true, OPTAB_LIB_WIDEN);
13379 emit_move_insn (srcptr, tmp);
13381 emit_label (out_label);
13384 /* Output "rep; mov" instruction.
13385 Arguments have same meaning as for previous function */
13387 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13388 rtx destptr, rtx srcptr,
13390 enum machine_mode mode)
13396 /* If the size is known, it is shorter to use rep movs. */
13397 if (mode == QImode && CONST_INT_P (count)
13398 && !(INTVAL (count) & 3))
13401 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13402 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13403 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13404 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13405 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13406 if (mode != QImode)
13408 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13409 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13410 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13411 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13412 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13413 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13417 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13418 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13420 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13424 /* Output "rep; stos" instruction.
13425 Arguments have same meaning as for previous function */
13427 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13429 enum machine_mode mode)
13434 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13435 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13436 value = force_reg (mode, gen_lowpart (mode, value));
13437 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13438 if (mode != QImode)
13440 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13441 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13442 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13445 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13446 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13450 emit_strmov (rtx destmem, rtx srcmem,
13451 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13453 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13454 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13455 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13458 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13460 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13461 rtx destptr, rtx srcptr, rtx count, int max_size)
13464 if (CONST_INT_P (count))
13466 HOST_WIDE_INT countval = INTVAL (count);
13469 if ((countval & 0x10) && max_size > 16)
13473 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13474 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13477 gcc_unreachable ();
13480 if ((countval & 0x08) && max_size > 8)
13483 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13486 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13487 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13491 if ((countval & 0x04) && max_size > 4)
13493 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13496 if ((countval & 0x02) && max_size > 2)
13498 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13501 if ((countval & 0x01) && max_size > 1)
13503 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13510 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13511 count, 1, OPTAB_DIRECT);
13512 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13513 count, QImode, 1, 4);
13517 /* When there are stringops, we can cheaply increase dest and src pointers.
13518 Otherwise we save code size by maintaining offset (zero is readily
13519 available from preceding rep operation) and using x86 addressing modes.
13521 if (TARGET_SINGLE_STRINGOP)
13525 rtx label = ix86_expand_aligntest (count, 4, true);
13526 src = change_address (srcmem, SImode, srcptr);
13527 dest = change_address (destmem, SImode, destptr);
13528 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13529 emit_label (label);
13530 LABEL_NUSES (label) = 1;
13534 rtx label = ix86_expand_aligntest (count, 2, true);
13535 src = change_address (srcmem, HImode, srcptr);
13536 dest = change_address (destmem, HImode, destptr);
13537 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13538 emit_label (label);
13539 LABEL_NUSES (label) = 1;
13543 rtx label = ix86_expand_aligntest (count, 1, true);
13544 src = change_address (srcmem, QImode, srcptr);
13545 dest = change_address (destmem, QImode, destptr);
13546 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13547 emit_label (label);
13548 LABEL_NUSES (label) = 1;
13553 rtx offset = force_reg (Pmode, const0_rtx);
13558 rtx label = ix86_expand_aligntest (count, 4, true);
13559 src = change_address (srcmem, SImode, srcptr);
13560 dest = change_address (destmem, SImode, destptr);
13561 emit_move_insn (dest, src);
13562 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13563 true, OPTAB_LIB_WIDEN);
13565 emit_move_insn (offset, tmp);
13566 emit_label (label);
13567 LABEL_NUSES (label) = 1;
13571 rtx label = ix86_expand_aligntest (count, 2, true);
13572 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13573 src = change_address (srcmem, HImode, tmp);
13574 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13575 dest = change_address (destmem, HImode, tmp);
13576 emit_move_insn (dest, src);
13577 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13578 true, OPTAB_LIB_WIDEN);
13580 emit_move_insn (offset, tmp);
13581 emit_label (label);
13582 LABEL_NUSES (label) = 1;
13586 rtx label = ix86_expand_aligntest (count, 1, true);
13587 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13588 src = change_address (srcmem, QImode, tmp);
13589 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13590 dest = change_address (destmem, QImode, tmp);
13591 emit_move_insn (dest, src);
13592 emit_label (label);
13593 LABEL_NUSES (label) = 1;
13598 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13600 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13601 rtx count, int max_size)
13604 expand_simple_binop (counter_mode (count), AND, count,
13605 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13606 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13607 gen_lowpart (QImode, value), count, QImode,
13611 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13613 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13617 if (CONST_INT_P (count))
13619 HOST_WIDE_INT countval = INTVAL (count);
13622 if ((countval & 0x10) && max_size > 16)
13626 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13627 emit_insn (gen_strset (destptr, dest, value));
13628 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13629 emit_insn (gen_strset (destptr, dest, value));
13632 gcc_unreachable ();
13635 if ((countval & 0x08) && max_size > 8)
13639 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13640 emit_insn (gen_strset (destptr, dest, value));
13644 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13645 emit_insn (gen_strset (destptr, dest, value));
13646 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13647 emit_insn (gen_strset (destptr, dest, value));
13651 if ((countval & 0x04) && max_size > 4)
13653 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13654 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13657 if ((countval & 0x02) && max_size > 2)
13659 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13660 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13663 if ((countval & 0x01) && max_size > 1)
13665 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13666 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13673 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13678 rtx label = ix86_expand_aligntest (count, 16, true);
13681 dest = change_address (destmem, DImode, destptr);
13682 emit_insn (gen_strset (destptr, dest, value));
13683 emit_insn (gen_strset (destptr, dest, value));
13687 dest = change_address (destmem, SImode, destptr);
13688 emit_insn (gen_strset (destptr, dest, value));
13689 emit_insn (gen_strset (destptr, dest, value));
13690 emit_insn (gen_strset (destptr, dest, value));
13691 emit_insn (gen_strset (destptr, dest, value));
13693 emit_label (label);
13694 LABEL_NUSES (label) = 1;
13698 rtx label = ix86_expand_aligntest (count, 8, true);
13701 dest = change_address (destmem, DImode, destptr);
13702 emit_insn (gen_strset (destptr, dest, value));
13706 dest = change_address (destmem, SImode, destptr);
13707 emit_insn (gen_strset (destptr, dest, value));
13708 emit_insn (gen_strset (destptr, dest, value));
13710 emit_label (label);
13711 LABEL_NUSES (label) = 1;
13715 rtx label = ix86_expand_aligntest (count, 4, true);
13716 dest = change_address (destmem, SImode, destptr);
13717 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13718 emit_label (label);
13719 LABEL_NUSES (label) = 1;
13723 rtx label = ix86_expand_aligntest (count, 2, true);
13724 dest = change_address (destmem, HImode, destptr);
13725 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13726 emit_label (label);
13727 LABEL_NUSES (label) = 1;
13731 rtx label = ix86_expand_aligntest (count, 1, true);
13732 dest = change_address (destmem, QImode, destptr);
13733 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13734 emit_label (label);
13735 LABEL_NUSES (label) = 1;
13739 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13740 DESIRED_ALIGNMENT. */
13742 expand_movmem_prologue (rtx destmem, rtx srcmem,
13743 rtx destptr, rtx srcptr, rtx count,
13744 int align, int desired_alignment)
13746 if (align <= 1 && desired_alignment > 1)
13748 rtx label = ix86_expand_aligntest (destptr, 1, false);
13749 srcmem = change_address (srcmem, QImode, srcptr);
13750 destmem = change_address (destmem, QImode, destptr);
13751 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13752 ix86_adjust_counter (count, 1);
13753 emit_label (label);
13754 LABEL_NUSES (label) = 1;
13756 if (align <= 2 && desired_alignment > 2)
13758 rtx label = ix86_expand_aligntest (destptr, 2, false);
13759 srcmem = change_address (srcmem, HImode, srcptr);
13760 destmem = change_address (destmem, HImode, destptr);
13761 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13762 ix86_adjust_counter (count, 2);
13763 emit_label (label);
13764 LABEL_NUSES (label) = 1;
13766 if (align <= 4 && desired_alignment > 4)
13768 rtx label = ix86_expand_aligntest (destptr, 4, false);
13769 srcmem = change_address (srcmem, SImode, srcptr);
13770 destmem = change_address (destmem, SImode, destptr);
13771 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13772 ix86_adjust_counter (count, 4);
13773 emit_label (label);
13774 LABEL_NUSES (label) = 1;
13776 gcc_assert (desired_alignment <= 8);
13779 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13780 DESIRED_ALIGNMENT. */
13782 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13783 int align, int desired_alignment)
13785 if (align <= 1 && desired_alignment > 1)
13787 rtx label = ix86_expand_aligntest (destptr, 1, false);
13788 destmem = change_address (destmem, QImode, destptr);
13789 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13790 ix86_adjust_counter (count, 1);
13791 emit_label (label);
13792 LABEL_NUSES (label) = 1;
13794 if (align <= 2 && desired_alignment > 2)
13796 rtx label = ix86_expand_aligntest (destptr, 2, false);
13797 destmem = change_address (destmem, HImode, destptr);
13798 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13799 ix86_adjust_counter (count, 2);
13800 emit_label (label);
13801 LABEL_NUSES (label) = 1;
13803 if (align <= 4 && desired_alignment > 4)
13805 rtx label = ix86_expand_aligntest (destptr, 4, false);
13806 destmem = change_address (destmem, SImode, destptr);
13807 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13808 ix86_adjust_counter (count, 4);
13809 emit_label (label);
13810 LABEL_NUSES (label) = 1;
13812 gcc_assert (desired_alignment <= 8);
13815 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13816 static enum stringop_alg
13817 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13818 int *dynamic_check)
13820 const struct stringop_algs * algs;
13822 *dynamic_check = -1;
13824 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13826 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13827 if (stringop_alg != no_stringop)
13828 return stringop_alg;
13829 /* rep; movq or rep; movl is the smallest variant. */
13830 else if (optimize_size)
13832 if (!count || (count & 3))
13833 return rep_prefix_1_byte;
13835 return rep_prefix_4_byte;
13837 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13839 else if (expected_size != -1 && expected_size < 4)
13840 return loop_1_byte;
13841 else if (expected_size != -1)
13844 enum stringop_alg alg = libcall;
13845 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13847 gcc_assert (algs->size[i].max);
13848 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13850 if (algs->size[i].alg != libcall)
13851 alg = algs->size[i].alg;
13852 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13853 last non-libcall inline algorithm. */
13854 if (TARGET_INLINE_ALL_STRINGOPS)
13856 /* When the current size is best to be copied by a libcall,
13857 but we are still forced to inline, run the heuristic bellow
13858 that will pick code for medium sized blocks. */
13859 if (alg != libcall)
13864 return algs->size[i].alg;
13867 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13869 /* When asked to inline the call anyway, try to pick meaningful choice.
13870 We look for maximal size of block that is faster to copy by hand and
13871 take blocks of at most of that size guessing that average size will
13872 be roughly half of the block.
13874 If this turns out to be bad, we might simply specify the preferred
13875 choice in ix86_costs. */
13876 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13877 && algs->unknown_size == libcall)
13880 enum stringop_alg alg;
13883 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13884 if (algs->size[i].alg != libcall && algs->size[i].alg)
13885 max = algs->size[i].max;
13888 alg = decide_alg (count, max / 2, memset, dynamic_check);
13889 gcc_assert (*dynamic_check == -1);
13890 gcc_assert (alg != libcall);
13891 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13892 *dynamic_check = max;
13895 return algs->unknown_size;
13898 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13899 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13901 decide_alignment (int align,
13902 enum stringop_alg alg,
13905 int desired_align = 0;
13909 gcc_unreachable ();
13911 case unrolled_loop:
13912 desired_align = GET_MODE_SIZE (Pmode);
13914 case rep_prefix_8_byte:
13917 case rep_prefix_4_byte:
13918 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13919 copying whole cacheline at once. */
13920 if (TARGET_PENTIUMPRO)
13925 case rep_prefix_1_byte:
13926 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13927 copying whole cacheline at once. */
13928 if (TARGET_PENTIUMPRO)
13942 if (desired_align < align)
13943 desired_align = align;
13944 if (expected_size != -1 && expected_size < 4)
13945 desired_align = align;
13946 return desired_align;
13949 /* Return the smallest power of 2 greater than VAL. */
13951 smallest_pow2_greater_than (int val)
13959 /* Expand string move (memcpy) operation. Use i386 string operations when
13960 profitable. expand_clrmem contains similar code. The code depends upon
13961 architecture, block size and alignment, but always has the same
13964 1) Prologue guard: Conditional that jumps up to epilogues for small
13965 blocks that can be handled by epilogue alone. This is faster but
13966 also needed for correctness, since prologue assume the block is larger
13967 than the desired alignment.
13969 Optional dynamic check for size and libcall for large
13970 blocks is emitted here too, with -minline-stringops-dynamically.
13972 2) Prologue: copy first few bytes in order to get destination aligned
13973 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13974 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13975 We emit either a jump tree on power of two sized blocks, or a byte loop.
13977 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13978 with specified algorithm.
13980 4) Epilogue: code copying tail of the block that is too small to be
13981 handled by main body (or up to size guarded by prologue guard). */
13984 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13985 rtx expected_align_exp, rtx expected_size_exp)
13991 rtx jump_around_label = NULL;
13992 HOST_WIDE_INT align = 1;
13993 unsigned HOST_WIDE_INT count = 0;
13994 HOST_WIDE_INT expected_size = -1;
13995 int size_needed = 0, epilogue_size_needed;
13996 int desired_align = 0;
13997 enum stringop_alg alg;
14000 if (CONST_INT_P (align_exp))
14001 align = INTVAL (align_exp);
14002 /* i386 can do misaligned access on reasonably increased cost. */
14003 if (CONST_INT_P (expected_align_exp)
14004 && INTVAL (expected_align_exp) > align)
14005 align = INTVAL (expected_align_exp);
14006 if (CONST_INT_P (count_exp))
14007 count = expected_size = INTVAL (count_exp);
14008 if (CONST_INT_P (expected_size_exp) && count == 0)
14009 expected_size = INTVAL (expected_size_exp);
14011 /* Step 0: Decide on preferred algorithm, desired alignment and
14012 size of chunks to be copied by main loop. */
14014 alg = decide_alg (count, expected_size, false, &dynamic_check);
14015 desired_align = decide_alignment (align, alg, expected_size);
14017 if (!TARGET_ALIGN_STRINGOPS)
14018 align = desired_align;
14020 if (alg == libcall)
14022 gcc_assert (alg != no_stringop);
14024 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14025 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14026 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14031 gcc_unreachable ();
14033 size_needed = GET_MODE_SIZE (Pmode);
14035 case unrolled_loop:
14036 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14038 case rep_prefix_8_byte:
14041 case rep_prefix_4_byte:
14044 case rep_prefix_1_byte:
14050 epilogue_size_needed = size_needed;
14052 /* Step 1: Prologue guard. */
14054 /* Alignment code needs count to be in register. */
14055 if (CONST_INT_P (count_exp) && desired_align > align)
14057 enum machine_mode mode = SImode;
14058 if (TARGET_64BIT && (count & ~0xffffffff))
14060 count_exp = force_reg (mode, count_exp);
14062 gcc_assert (desired_align >= 1 && align >= 1);
14064 /* Ensure that alignment prologue won't copy past end of block. */
14065 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14067 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14068 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14069 Make sure it is power of 2. */
14070 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14072 label = gen_label_rtx ();
14073 emit_cmp_and_jump_insns (count_exp,
14074 GEN_INT (epilogue_size_needed),
14075 LTU, 0, counter_mode (count_exp), 1, label);
14076 if (GET_CODE (count_exp) == CONST_INT)
14078 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14079 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14081 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14083 /* Emit code to decide on runtime whether library call or inline should be
14085 if (dynamic_check != -1)
14087 rtx hot_label = gen_label_rtx ();
14088 jump_around_label = gen_label_rtx ();
14089 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14090 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14091 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14092 emit_block_move_via_libcall (dst, src, count_exp, false);
14093 emit_jump (jump_around_label);
14094 emit_label (hot_label);
14097 /* Step 2: Alignment prologue. */
14099 if (desired_align > align)
14101 /* Except for the first move in epilogue, we no longer know
14102 constant offset in aliasing info. It don't seems to worth
14103 the pain to maintain it for the first move, so throw away
14105 src = change_address (src, BLKmode, srcreg);
14106 dst = change_address (dst, BLKmode, destreg);
14107 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14110 if (label && size_needed == 1)
14112 emit_label (label);
14113 LABEL_NUSES (label) = 1;
14117 /* Step 3: Main loop. */
14123 gcc_unreachable ();
14125 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14126 count_exp, QImode, 1, expected_size);
14129 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14130 count_exp, Pmode, 1, expected_size);
14132 case unrolled_loop:
14133 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14134 registers for 4 temporaries anyway. */
14135 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14136 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14139 case rep_prefix_8_byte:
14140 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14143 case rep_prefix_4_byte:
14144 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14147 case rep_prefix_1_byte:
14148 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14152 /* Adjust properly the offset of src and dest memory for aliasing. */
14153 if (CONST_INT_P (count_exp))
14155 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14156 (count / size_needed) * size_needed);
14157 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14158 (count / size_needed) * size_needed);
14162 src = change_address (src, BLKmode, srcreg);
14163 dst = change_address (dst, BLKmode, destreg);
14166 /* Step 4: Epilogue to copy the remaining bytes. */
14170 /* When the main loop is done, COUNT_EXP might hold original count,
14171 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14172 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14173 bytes. Compensate if needed. */
14175 if (size_needed < epilogue_size_needed)
14178 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14179 GEN_INT (size_needed - 1), count_exp, 1,
14181 if (tmp != count_exp)
14182 emit_move_insn (count_exp, tmp);
14184 emit_label (label);
14185 LABEL_NUSES (label) = 1;
14188 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14189 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14190 epilogue_size_needed);
14191 if (jump_around_label)
14192 emit_label (jump_around_label);
14196 /* Helper function for memcpy. For QImode value 0xXY produce
14197 0xXYXYXYXY of wide specified by MODE. This is essentially
14198 a * 0x10101010, but we can do slightly better than
14199 synth_mult by unwinding the sequence by hand on CPUs with
14202 promote_duplicated_reg (enum machine_mode mode, rtx val)
14204 enum machine_mode valmode = GET_MODE (val);
14206 int nops = mode == DImode ? 3 : 2;
14208 gcc_assert (mode == SImode || mode == DImode);
14209 if (val == const0_rtx)
14210 return copy_to_mode_reg (mode, const0_rtx);
14211 if (CONST_INT_P (val))
14213 HOST_WIDE_INT v = INTVAL (val) & 255;
14217 if (mode == DImode)
14218 v |= (v << 16) << 16;
14219 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14222 if (valmode == VOIDmode)
14224 if (valmode != QImode)
14225 val = gen_lowpart (QImode, val);
14226 if (mode == QImode)
14228 if (!TARGET_PARTIAL_REG_STALL)
14230 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14231 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14232 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14233 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14235 rtx reg = convert_modes (mode, QImode, val, true);
14236 tmp = promote_duplicated_reg (mode, const1_rtx);
14237 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14242 rtx reg = convert_modes (mode, QImode, val, true);
14244 if (!TARGET_PARTIAL_REG_STALL)
14245 if (mode == SImode)
14246 emit_insn (gen_movsi_insv_1 (reg, reg));
14248 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14251 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14252 NULL, 1, OPTAB_DIRECT);
14254 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14256 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14257 NULL, 1, OPTAB_DIRECT);
14258 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14259 if (mode == SImode)
14261 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14262 NULL, 1, OPTAB_DIRECT);
14263 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14268 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14269 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14270 alignment from ALIGN to DESIRED_ALIGN. */
14272 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14277 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14278 promoted_val = promote_duplicated_reg (DImode, val);
14279 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14280 promoted_val = promote_duplicated_reg (SImode, val);
14281 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14282 promoted_val = promote_duplicated_reg (HImode, val);
14284 promoted_val = val;
14286 return promoted_val;
14289 /* Expand string clear operation (bzero). Use i386 string operations when
14290 profitable. See expand_movmem comment for explanation of individual
14291 steps performed. */
14293 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14294 rtx expected_align_exp, rtx expected_size_exp)
14299 rtx jump_around_label = NULL;
14300 HOST_WIDE_INT align = 1;
14301 unsigned HOST_WIDE_INT count = 0;
14302 HOST_WIDE_INT expected_size = -1;
14303 int size_needed = 0, epilogue_size_needed;
14304 int desired_align = 0;
14305 enum stringop_alg alg;
14306 rtx promoted_val = NULL;
14307 bool force_loopy_epilogue = false;
14310 if (CONST_INT_P (align_exp))
14311 align = INTVAL (align_exp);
14312 /* i386 can do misaligned access on reasonably increased cost. */
14313 if (CONST_INT_P (expected_align_exp)
14314 && INTVAL (expected_align_exp) > align)
14315 align = INTVAL (expected_align_exp);
14316 if (CONST_INT_P (count_exp))
14317 count = expected_size = INTVAL (count_exp);
14318 if (CONST_INT_P (expected_size_exp) && count == 0)
14319 expected_size = INTVAL (expected_size_exp);
14321 /* Step 0: Decide on preferred algorithm, desired alignment and
14322 size of chunks to be copied by main loop. */
14324 alg = decide_alg (count, expected_size, true, &dynamic_check);
14325 desired_align = decide_alignment (align, alg, expected_size);
14327 if (!TARGET_ALIGN_STRINGOPS)
14328 align = desired_align;
14330 if (alg == libcall)
14332 gcc_assert (alg != no_stringop);
14334 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14335 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14340 gcc_unreachable ();
14342 size_needed = GET_MODE_SIZE (Pmode);
14344 case unrolled_loop:
14345 size_needed = GET_MODE_SIZE (Pmode) * 4;
14347 case rep_prefix_8_byte:
14350 case rep_prefix_4_byte:
14353 case rep_prefix_1_byte:
14358 epilogue_size_needed = size_needed;
14360 /* Step 1: Prologue guard. */
14362 /* Alignment code needs count to be in register. */
14363 if (CONST_INT_P (count_exp) && desired_align > align)
14365 enum machine_mode mode = SImode;
14366 if (TARGET_64BIT && (count & ~0xffffffff))
14368 count_exp = force_reg (mode, count_exp);
14370 /* Do the cheap promotion to allow better CSE across the
14371 main loop and epilogue (ie one load of the big constant in the
14372 front of all code. */
14373 if (CONST_INT_P (val_exp))
14374 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14375 desired_align, align);
14376 /* Ensure that alignment prologue won't copy past end of block. */
14377 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14379 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14380 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14381 Make sure it is power of 2. */
14382 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14384 /* To improve performance of small blocks, we jump around the VAL
14385 promoting mode. This mean that if the promoted VAL is not constant,
14386 we might not use it in the epilogue and have to use byte
14388 if (epilogue_size_needed > 2 && !promoted_val)
14389 force_loopy_epilogue = true;
14390 label = gen_label_rtx ();
14391 emit_cmp_and_jump_insns (count_exp,
14392 GEN_INT (epilogue_size_needed),
14393 LTU, 0, counter_mode (count_exp), 1, label);
14394 if (GET_CODE (count_exp) == CONST_INT)
14396 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14397 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14399 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14401 if (dynamic_check != -1)
14403 rtx hot_label = gen_label_rtx ();
14404 jump_around_label = gen_label_rtx ();
14405 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14406 LEU, 0, counter_mode (count_exp), 1, hot_label);
14407 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14408 set_storage_via_libcall (dst, count_exp, val_exp, false);
14409 emit_jump (jump_around_label);
14410 emit_label (hot_label);
14413 /* Step 2: Alignment prologue. */
14415 /* Do the expensive promotion once we branched off the small blocks. */
14417 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14418 desired_align, align);
14419 gcc_assert (desired_align >= 1 && align >= 1);
14421 if (desired_align > align)
14423 /* Except for the first move in epilogue, we no longer know
14424 constant offset in aliasing info. It don't seems to worth
14425 the pain to maintain it for the first move, so throw away
14427 dst = change_address (dst, BLKmode, destreg);
14428 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14431 if (label && size_needed == 1)
14433 emit_label (label);
14434 LABEL_NUSES (label) = 1;
14438 /* Step 3: Main loop. */
14444 gcc_unreachable ();
14446 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14447 count_exp, QImode, 1, expected_size);
14450 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14451 count_exp, Pmode, 1, expected_size);
14453 case unrolled_loop:
14454 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14455 count_exp, Pmode, 4, expected_size);
14457 case rep_prefix_8_byte:
14458 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14461 case rep_prefix_4_byte:
14462 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14465 case rep_prefix_1_byte:
14466 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14470 /* Adjust properly the offset of src and dest memory for aliasing. */
14471 if (CONST_INT_P (count_exp))
14472 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14473 (count / size_needed) * size_needed);
14475 dst = change_address (dst, BLKmode, destreg);
14477 /* Step 4: Epilogue to copy the remaining bytes. */
14481 /* When the main loop is done, COUNT_EXP might hold original count,
14482 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14483 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14484 bytes. Compensate if needed. */
14486 if (size_needed < desired_align - align)
14489 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14490 GEN_INT (size_needed - 1), count_exp, 1,
14492 size_needed = desired_align - align + 1;
14493 if (tmp != count_exp)
14494 emit_move_insn (count_exp, tmp);
14496 emit_label (label);
14497 LABEL_NUSES (label) = 1;
14499 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14501 if (force_loopy_epilogue)
14502 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14505 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14508 if (jump_around_label)
14509 emit_label (jump_around_label);
14513 /* Expand the appropriate insns for doing strlen if not just doing
14516 out = result, initialized with the start address
14517 align_rtx = alignment of the address.
14518 scratch = scratch register, initialized with the startaddress when
14519 not aligned, otherwise undefined
14521 This is just the body. It needs the initializations mentioned above and
14522 some address computing at the end. These things are done in i386.md. */
14525 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14529 rtx align_2_label = NULL_RTX;
14530 rtx align_3_label = NULL_RTX;
14531 rtx align_4_label = gen_label_rtx ();
14532 rtx end_0_label = gen_label_rtx ();
14534 rtx tmpreg = gen_reg_rtx (SImode);
14535 rtx scratch = gen_reg_rtx (SImode);
14539 if (CONST_INT_P (align_rtx))
14540 align = INTVAL (align_rtx);
14542 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14544 /* Is there a known alignment and is it less than 4? */
14547 rtx scratch1 = gen_reg_rtx (Pmode);
14548 emit_move_insn (scratch1, out);
14549 /* Is there a known alignment and is it not 2? */
14552 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14553 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14555 /* Leave just the 3 lower bits. */
14556 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14557 NULL_RTX, 0, OPTAB_WIDEN);
14559 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14560 Pmode, 1, align_4_label);
14561 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14562 Pmode, 1, align_2_label);
14563 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14564 Pmode, 1, align_3_label);
14568 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14569 check if is aligned to 4 - byte. */
14571 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14572 NULL_RTX, 0, OPTAB_WIDEN);
14574 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14575 Pmode, 1, align_4_label);
14578 mem = change_address (src, QImode, out);
14580 /* Now compare the bytes. */
14582 /* Compare the first n unaligned byte on a byte per byte basis. */
14583 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14584 QImode, 1, end_0_label);
14586 /* Increment the address. */
14588 emit_insn (gen_adddi3 (out, out, const1_rtx));
14590 emit_insn (gen_addsi3 (out, out, const1_rtx));
14592 /* Not needed with an alignment of 2 */
14595 emit_label (align_2_label);
14597 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14601 emit_insn (gen_adddi3 (out, out, const1_rtx));
14603 emit_insn (gen_addsi3 (out, out, const1_rtx));
14605 emit_label (align_3_label);
14608 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14612 emit_insn (gen_adddi3 (out, out, const1_rtx));
14614 emit_insn (gen_addsi3 (out, out, const1_rtx));
14617 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14618 align this loop. It gives only huge programs, but does not help to
14620 emit_label (align_4_label);
14622 mem = change_address (src, SImode, out);
14623 emit_move_insn (scratch, mem);
14625 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14627 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14629 /* This formula yields a nonzero result iff one of the bytes is zero.
14630 This saves three branches inside loop and many cycles. */
14632 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14633 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14634 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14635 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14636 gen_int_mode (0x80808080, SImode)));
14637 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14642 rtx reg = gen_reg_rtx (SImode);
14643 rtx reg2 = gen_reg_rtx (Pmode);
14644 emit_move_insn (reg, tmpreg);
14645 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14647 /* If zero is not in the first two bytes, move two bytes forward. */
14648 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14649 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14650 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14651 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14652 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14655 /* Emit lea manually to avoid clobbering of flags. */
14656 emit_insn (gen_rtx_SET (SImode, reg2,
14657 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14659 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14660 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14661 emit_insn (gen_rtx_SET (VOIDmode, out,
14662 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14669 rtx end_2_label = gen_label_rtx ();
14670 /* Is zero in the first two bytes? */
14672 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14673 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14674 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14675 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14676 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14678 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14679 JUMP_LABEL (tmp) = end_2_label;
14681 /* Not in the first two. Move two bytes forward. */
14682 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14684 emit_insn (gen_adddi3 (out, out, const2_rtx));
14686 emit_insn (gen_addsi3 (out, out, const2_rtx));
14688 emit_label (end_2_label);
14692 /* Avoid branch in fixing the byte. */
14693 tmpreg = gen_lowpart (QImode, tmpreg);
14694 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14695 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14697 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14699 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14701 emit_label (end_0_label);
14704 /* Expand strlen. */
14707 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14709 rtx addr, scratch1, scratch2, scratch3, scratch4;
14711 /* The generic case of strlen expander is long. Avoid it's
14712 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14714 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14715 && !TARGET_INLINE_ALL_STRINGOPS
14717 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14720 addr = force_reg (Pmode, XEXP (src, 0));
14721 scratch1 = gen_reg_rtx (Pmode);
14723 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14726 /* Well it seems that some optimizer does not combine a call like
14727 foo(strlen(bar), strlen(bar));
14728 when the move and the subtraction is done here. It does calculate
14729 the length just once when these instructions are done inside of
14730 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14731 often used and I use one fewer register for the lifetime of
14732 output_strlen_unroll() this is better. */
14734 emit_move_insn (out, addr);
14736 ix86_expand_strlensi_unroll_1 (out, src, align);
14738 /* strlensi_unroll_1 returns the address of the zero at the end of
14739 the string, like memchr(), so compute the length by subtracting
14740 the start address. */
14742 emit_insn (gen_subdi3 (out, out, addr));
14744 emit_insn (gen_subsi3 (out, out, addr));
14749 scratch2 = gen_reg_rtx (Pmode);
14750 scratch3 = gen_reg_rtx (Pmode);
14751 scratch4 = force_reg (Pmode, constm1_rtx);
14753 emit_move_insn (scratch3, addr);
14754 eoschar = force_reg (QImode, eoschar);
14756 src = replace_equiv_address_nv (src, scratch3);
14758 /* If .md starts supporting :P, this can be done in .md. */
14759 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14760 scratch4), UNSPEC_SCAS);
14761 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14764 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14765 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14769 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14770 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14776 /* For given symbol (function) construct code to compute address of it's PLT
14777 entry in large x86-64 PIC model. */
14779 construct_plt_address (rtx symbol)
14781 rtx tmp = gen_reg_rtx (Pmode);
14782 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
14784 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
14785 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
14787 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
14788 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
14793 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14794 rtx callarg2 ATTRIBUTE_UNUSED,
14795 rtx pop, int sibcall)
14797 rtx use = NULL, call;
14799 if (pop == const0_rtx)
14801 gcc_assert (!TARGET_64BIT || !pop);
14803 if (TARGET_MACHO && !TARGET_64BIT)
14806 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14807 fnaddr = machopic_indirect_call_target (fnaddr);
14812 /* Static functions and indirect calls don't need the pic register. */
14813 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
14814 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14815 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14816 use_reg (&use, pic_offset_table_rtx);
14819 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14821 rtx al = gen_rtx_REG (QImode, 0);
14822 emit_move_insn (al, callarg2);
14823 use_reg (&use, al);
14826 if (ix86_cmodel == CM_LARGE_PIC
14827 && GET_CODE (fnaddr) == MEM
14828 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14829 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
14830 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
14831 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14833 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14834 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14836 if (sibcall && TARGET_64BIT
14837 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14840 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14841 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14842 emit_move_insn (fnaddr, addr);
14843 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14846 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14848 call = gen_rtx_SET (VOIDmode, retval, call);
14851 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14852 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14853 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14856 call = emit_call_insn (call);
14858 CALL_INSN_FUNCTION_USAGE (call) = use;
14862 /* Clear stack slot assignments remembered from previous functions.
14863 This is called from INIT_EXPANDERS once before RTL is emitted for each
14866 static struct machine_function *
14867 ix86_init_machine_status (void)
14869 struct machine_function *f;
14871 f = ggc_alloc_cleared (sizeof (struct machine_function));
14872 f->use_fast_prologue_epilogue_nregs = -1;
14873 f->tls_descriptor_call_expanded_p = 0;
14878 /* Return a MEM corresponding to a stack slot with mode MODE.
14879 Allocate a new slot if necessary.
14881 The RTL for a function can have several slots available: N is
14882 which slot to use. */
14885 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14887 struct stack_local_entry *s;
14889 gcc_assert (n < MAX_386_STACK_LOCALS);
14891 for (s = ix86_stack_locals; s; s = s->next)
14892 if (s->mode == mode && s->n == n)
14893 return copy_rtx (s->rtl);
14895 s = (struct stack_local_entry *)
14896 ggc_alloc (sizeof (struct stack_local_entry));
14899 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14901 s->next = ix86_stack_locals;
14902 ix86_stack_locals = s;
14906 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14908 static GTY(()) rtx ix86_tls_symbol;
14910 ix86_tls_get_addr (void)
14913 if (!ix86_tls_symbol)
14915 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14916 (TARGET_ANY_GNU_TLS
14918 ? "___tls_get_addr"
14919 : "__tls_get_addr");
14922 return ix86_tls_symbol;
14925 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14927 static GTY(()) rtx ix86_tls_module_base_symbol;
14929 ix86_tls_module_base (void)
14932 if (!ix86_tls_module_base_symbol)
14934 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14935 "_TLS_MODULE_BASE_");
14936 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14937 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14940 return ix86_tls_module_base_symbol;
14943 /* Calculate the length of the memory address in the instruction
14944 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14947 memory_address_length (rtx addr)
14949 struct ix86_address parts;
14950 rtx base, index, disp;
14954 if (GET_CODE (addr) == PRE_DEC
14955 || GET_CODE (addr) == POST_INC
14956 || GET_CODE (addr) == PRE_MODIFY
14957 || GET_CODE (addr) == POST_MODIFY)
14960 ok = ix86_decompose_address (addr, &parts);
14963 if (parts.base && GET_CODE (parts.base) == SUBREG)
14964 parts.base = SUBREG_REG (parts.base);
14965 if (parts.index && GET_CODE (parts.index) == SUBREG)
14966 parts.index = SUBREG_REG (parts.index);
14969 index = parts.index;
14974 - esp as the base always wants an index,
14975 - ebp as the base always wants a displacement. */
14977 /* Register Indirect. */
14978 if (base && !index && !disp)
14980 /* esp (for its index) and ebp (for its displacement) need
14981 the two-byte modrm form. */
14982 if (addr == stack_pointer_rtx
14983 || addr == arg_pointer_rtx
14984 || addr == frame_pointer_rtx
14985 || addr == hard_frame_pointer_rtx)
14989 /* Direct Addressing. */
14990 else if (disp && !base && !index)
14995 /* Find the length of the displacement constant. */
14998 if (base && satisfies_constraint_K (disp))
15003 /* ebp always wants a displacement. */
15004 else if (base == hard_frame_pointer_rtx)
15007 /* An index requires the two-byte modrm form.... */
15009 /* ...like esp, which always wants an index. */
15010 || base == stack_pointer_rtx
15011 || base == arg_pointer_rtx
15012 || base == frame_pointer_rtx)
15019 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15020 is set, expect that insn have 8bit immediate alternative. */
15022 ix86_attr_length_immediate_default (rtx insn, int shortform)
15026 extract_insn_cached (insn);
15027 for (i = recog_data.n_operands - 1; i >= 0; --i)
15028 if (CONSTANT_P (recog_data.operand[i]))
15031 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15035 switch (get_attr_mode (insn))
15046 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15051 fatal_insn ("unknown insn mode", insn);
15057 /* Compute default value for "length_address" attribute. */
15059 ix86_attr_length_address_default (rtx insn)
15063 if (get_attr_type (insn) == TYPE_LEA)
15065 rtx set = PATTERN (insn);
15067 if (GET_CODE (set) == PARALLEL)
15068 set = XVECEXP (set, 0, 0);
15070 gcc_assert (GET_CODE (set) == SET);
15072 return memory_address_length (SET_SRC (set));
15075 extract_insn_cached (insn);
15076 for (i = recog_data.n_operands - 1; i >= 0; --i)
15077 if (MEM_P (recog_data.operand[i]))
15079 return memory_address_length (XEXP (recog_data.operand[i], 0));
15085 /* Return the maximum number of instructions a cpu can issue. */
15088 ix86_issue_rate (void)
15092 case PROCESSOR_PENTIUM:
15096 case PROCESSOR_PENTIUMPRO:
15097 case PROCESSOR_PENTIUM4:
15098 case PROCESSOR_ATHLON:
15100 case PROCESSOR_AMDFAM10:
15101 case PROCESSOR_NOCONA:
15102 case PROCESSOR_GENERIC32:
15103 case PROCESSOR_GENERIC64:
15106 case PROCESSOR_CORE2:
15114 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15115 by DEP_INSN and nothing set by DEP_INSN. */
15118 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15122 /* Simplify the test for uninteresting insns. */
15123 if (insn_type != TYPE_SETCC
15124 && insn_type != TYPE_ICMOV
15125 && insn_type != TYPE_FCMOV
15126 && insn_type != TYPE_IBR)
15129 if ((set = single_set (dep_insn)) != 0)
15131 set = SET_DEST (set);
15134 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15135 && XVECLEN (PATTERN (dep_insn), 0) == 2
15136 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15137 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15139 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15140 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15145 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15148 /* This test is true if the dependent insn reads the flags but
15149 not any other potentially set register. */
15150 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15153 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15159 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15160 address with operands set by DEP_INSN. */
15163 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15167 if (insn_type == TYPE_LEA
15170 addr = PATTERN (insn);
15172 if (GET_CODE (addr) == PARALLEL)
15173 addr = XVECEXP (addr, 0, 0);
15175 gcc_assert (GET_CODE (addr) == SET);
15177 addr = SET_SRC (addr);
15182 extract_insn_cached (insn);
15183 for (i = recog_data.n_operands - 1; i >= 0; --i)
15184 if (MEM_P (recog_data.operand[i]))
15186 addr = XEXP (recog_data.operand[i], 0);
15193 return modified_in_p (addr, dep_insn);
15197 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15199 enum attr_type insn_type, dep_insn_type;
15200 enum attr_memory memory;
15202 int dep_insn_code_number;
15204 /* Anti and output dependencies have zero cost on all CPUs. */
15205 if (REG_NOTE_KIND (link) != 0)
15208 dep_insn_code_number = recog_memoized (dep_insn);
15210 /* If we can't recognize the insns, we can't really do anything. */
15211 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15214 insn_type = get_attr_type (insn);
15215 dep_insn_type = get_attr_type (dep_insn);
15219 case PROCESSOR_PENTIUM:
15220 /* Address Generation Interlock adds a cycle of latency. */
15221 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15224 /* ??? Compares pair with jump/setcc. */
15225 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15228 /* Floating point stores require value to be ready one cycle earlier. */
15229 if (insn_type == TYPE_FMOV
15230 && get_attr_memory (insn) == MEMORY_STORE
15231 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15235 case PROCESSOR_PENTIUMPRO:
15236 memory = get_attr_memory (insn);
15238 /* INT->FP conversion is expensive. */
15239 if (get_attr_fp_int_src (dep_insn))
15242 /* There is one cycle extra latency between an FP op and a store. */
15243 if (insn_type == TYPE_FMOV
15244 && (set = single_set (dep_insn)) != NULL_RTX
15245 && (set2 = single_set (insn)) != NULL_RTX
15246 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15247 && MEM_P (SET_DEST (set2)))
15250 /* Show ability of reorder buffer to hide latency of load by executing
15251 in parallel with previous instruction in case
15252 previous instruction is not needed to compute the address. */
15253 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15254 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15256 /* Claim moves to take one cycle, as core can issue one load
15257 at time and the next load can start cycle later. */
15258 if (dep_insn_type == TYPE_IMOV
15259 || dep_insn_type == TYPE_FMOV)
15267 memory = get_attr_memory (insn);
15269 /* The esp dependency is resolved before the instruction is really
15271 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15272 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15275 /* INT->FP conversion is expensive. */
15276 if (get_attr_fp_int_src (dep_insn))
15279 /* Show ability of reorder buffer to hide latency of load by executing
15280 in parallel with previous instruction in case
15281 previous instruction is not needed to compute the address. */
15282 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15283 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15285 /* Claim moves to take one cycle, as core can issue one load
15286 at time and the next load can start cycle later. */
15287 if (dep_insn_type == TYPE_IMOV
15288 || dep_insn_type == TYPE_FMOV)
15297 case PROCESSOR_ATHLON:
15299 case PROCESSOR_AMDFAM10:
15300 case PROCESSOR_GENERIC32:
15301 case PROCESSOR_GENERIC64:
15302 memory = get_attr_memory (insn);
15304 /* Show ability of reorder buffer to hide latency of load by executing
15305 in parallel with previous instruction in case
15306 previous instruction is not needed to compute the address. */
15307 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15308 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15310 enum attr_unit unit = get_attr_unit (insn);
15313 /* Because of the difference between the length of integer and
15314 floating unit pipeline preparation stages, the memory operands
15315 for floating point are cheaper.
15317 ??? For Athlon it the difference is most probably 2. */
15318 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15321 loadcost = TARGET_ATHLON ? 2 : 0;
15323 if (cost >= loadcost)
15336 /* How many alternative schedules to try. This should be as wide as the
15337 scheduling freedom in the DFA, but no wider. Making this value too
15338 large results extra work for the scheduler. */
15341 ia32_multipass_dfa_lookahead (void)
15343 if (ix86_tune == PROCESSOR_PENTIUM)
15346 if (ix86_tune == PROCESSOR_PENTIUMPRO
15347 || ix86_tune == PROCESSOR_K6)
15355 /* Compute the alignment given to a constant that is being placed in memory.
15356 EXP is the constant and ALIGN is the alignment that the object would
15358 The value of this function is used instead of that alignment to align
15362 ix86_constant_alignment (tree exp, int align)
15364 if (TREE_CODE (exp) == REAL_CST)
15366 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15368 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15371 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15372 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15373 return BITS_PER_WORD;
15378 /* Compute the alignment for a static variable.
15379 TYPE is the data type, and ALIGN is the alignment that
15380 the object would ordinarily have. The value of this function is used
15381 instead of that alignment to align the object. */
15384 ix86_data_alignment (tree type, int align)
15386 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15388 if (AGGREGATE_TYPE_P (type)
15389 && TYPE_SIZE (type)
15390 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15391 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15392 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15393 && align < max_align)
15396 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15397 to 16byte boundary. */
15400 if (AGGREGATE_TYPE_P (type)
15401 && TYPE_SIZE (type)
15402 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15403 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15404 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15408 if (TREE_CODE (type) == ARRAY_TYPE)
15410 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15412 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15415 else if (TREE_CODE (type) == COMPLEX_TYPE)
15418 if (TYPE_MODE (type) == DCmode && align < 64)
15420 if (TYPE_MODE (type) == XCmode && align < 128)
15423 else if ((TREE_CODE (type) == RECORD_TYPE
15424 || TREE_CODE (type) == UNION_TYPE
15425 || TREE_CODE (type) == QUAL_UNION_TYPE)
15426 && TYPE_FIELDS (type))
15428 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15430 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15433 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15434 || TREE_CODE (type) == INTEGER_TYPE)
15436 if (TYPE_MODE (type) == DFmode && align < 64)
15438 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15445 /* Compute the alignment for a local variable.
15446 TYPE is the data type, and ALIGN is the alignment that
15447 the object would ordinarily have. The value of this macro is used
15448 instead of that alignment to align the object. */
15451 ix86_local_alignment (tree type, int align)
15453 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15454 to 16byte boundary. */
15457 if (AGGREGATE_TYPE_P (type)
15458 && TYPE_SIZE (type)
15459 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15460 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15461 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15464 if (TREE_CODE (type) == ARRAY_TYPE)
15466 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15468 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15471 else if (TREE_CODE (type) == COMPLEX_TYPE)
15473 if (TYPE_MODE (type) == DCmode && align < 64)
15475 if (TYPE_MODE (type) == XCmode && align < 128)
15478 else if ((TREE_CODE (type) == RECORD_TYPE
15479 || TREE_CODE (type) == UNION_TYPE
15480 || TREE_CODE (type) == QUAL_UNION_TYPE)
15481 && TYPE_FIELDS (type))
15483 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15485 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15488 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15489 || TREE_CODE (type) == INTEGER_TYPE)
15492 if (TYPE_MODE (type) == DFmode && align < 64)
15494 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15500 /* Emit RTL insns to initialize the variable parts of a trampoline.
15501 FNADDR is an RTX for the address of the function's pure code.
15502 CXT is an RTX for the static chain value for the function. */
15504 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15508 /* Compute offset from the end of the jmp to the target function. */
15509 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15510 plus_constant (tramp, 10),
15511 NULL_RTX, 1, OPTAB_DIRECT);
15512 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15513 gen_int_mode (0xb9, QImode));
15514 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15515 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15516 gen_int_mode (0xe9, QImode));
15517 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15522 /* Try to load address using shorter movl instead of movabs.
15523 We may want to support movq for kernel mode, but kernel does not use
15524 trampolines at the moment. */
15525 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15527 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15528 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15529 gen_int_mode (0xbb41, HImode));
15530 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15531 gen_lowpart (SImode, fnaddr));
15536 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15537 gen_int_mode (0xbb49, HImode));
15538 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15542 /* Load static chain using movabs to r10. */
15543 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15544 gen_int_mode (0xba49, HImode));
15545 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15548 /* Jump to the r11 */
15549 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15550 gen_int_mode (0xff49, HImode));
15551 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15552 gen_int_mode (0xe3, QImode));
15554 gcc_assert (offset <= TRAMPOLINE_SIZE);
15557 #ifdef ENABLE_EXECUTE_STACK
15558 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15559 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15563 /* Codes for all the SSE/MMX builtins. */
15566 IX86_BUILTIN_ADDPS,
15567 IX86_BUILTIN_ADDSS,
15568 IX86_BUILTIN_DIVPS,
15569 IX86_BUILTIN_DIVSS,
15570 IX86_BUILTIN_MULPS,
15571 IX86_BUILTIN_MULSS,
15572 IX86_BUILTIN_SUBPS,
15573 IX86_BUILTIN_SUBSS,
15575 IX86_BUILTIN_CMPEQPS,
15576 IX86_BUILTIN_CMPLTPS,
15577 IX86_BUILTIN_CMPLEPS,
15578 IX86_BUILTIN_CMPGTPS,
15579 IX86_BUILTIN_CMPGEPS,
15580 IX86_BUILTIN_CMPNEQPS,
15581 IX86_BUILTIN_CMPNLTPS,
15582 IX86_BUILTIN_CMPNLEPS,
15583 IX86_BUILTIN_CMPNGTPS,
15584 IX86_BUILTIN_CMPNGEPS,
15585 IX86_BUILTIN_CMPORDPS,
15586 IX86_BUILTIN_CMPUNORDPS,
15587 IX86_BUILTIN_CMPEQSS,
15588 IX86_BUILTIN_CMPLTSS,
15589 IX86_BUILTIN_CMPLESS,
15590 IX86_BUILTIN_CMPNEQSS,
15591 IX86_BUILTIN_CMPNLTSS,
15592 IX86_BUILTIN_CMPNLESS,
15593 IX86_BUILTIN_CMPNGTSS,
15594 IX86_BUILTIN_CMPNGESS,
15595 IX86_BUILTIN_CMPORDSS,
15596 IX86_BUILTIN_CMPUNORDSS,
15598 IX86_BUILTIN_COMIEQSS,
15599 IX86_BUILTIN_COMILTSS,
15600 IX86_BUILTIN_COMILESS,
15601 IX86_BUILTIN_COMIGTSS,
15602 IX86_BUILTIN_COMIGESS,
15603 IX86_BUILTIN_COMINEQSS,
15604 IX86_BUILTIN_UCOMIEQSS,
15605 IX86_BUILTIN_UCOMILTSS,
15606 IX86_BUILTIN_UCOMILESS,
15607 IX86_BUILTIN_UCOMIGTSS,
15608 IX86_BUILTIN_UCOMIGESS,
15609 IX86_BUILTIN_UCOMINEQSS,
15611 IX86_BUILTIN_CVTPI2PS,
15612 IX86_BUILTIN_CVTPS2PI,
15613 IX86_BUILTIN_CVTSI2SS,
15614 IX86_BUILTIN_CVTSI642SS,
15615 IX86_BUILTIN_CVTSS2SI,
15616 IX86_BUILTIN_CVTSS2SI64,
15617 IX86_BUILTIN_CVTTPS2PI,
15618 IX86_BUILTIN_CVTTSS2SI,
15619 IX86_BUILTIN_CVTTSS2SI64,
15621 IX86_BUILTIN_MAXPS,
15622 IX86_BUILTIN_MAXSS,
15623 IX86_BUILTIN_MINPS,
15624 IX86_BUILTIN_MINSS,
15626 IX86_BUILTIN_LOADUPS,
15627 IX86_BUILTIN_STOREUPS,
15628 IX86_BUILTIN_MOVSS,
15630 IX86_BUILTIN_MOVHLPS,
15631 IX86_BUILTIN_MOVLHPS,
15632 IX86_BUILTIN_LOADHPS,
15633 IX86_BUILTIN_LOADLPS,
15634 IX86_BUILTIN_STOREHPS,
15635 IX86_BUILTIN_STORELPS,
15637 IX86_BUILTIN_MASKMOVQ,
15638 IX86_BUILTIN_MOVMSKPS,
15639 IX86_BUILTIN_PMOVMSKB,
15641 IX86_BUILTIN_MOVNTPS,
15642 IX86_BUILTIN_MOVNTQ,
15644 IX86_BUILTIN_LOADDQU,
15645 IX86_BUILTIN_STOREDQU,
15647 IX86_BUILTIN_PACKSSWB,
15648 IX86_BUILTIN_PACKSSDW,
15649 IX86_BUILTIN_PACKUSWB,
15651 IX86_BUILTIN_PADDB,
15652 IX86_BUILTIN_PADDW,
15653 IX86_BUILTIN_PADDD,
15654 IX86_BUILTIN_PADDQ,
15655 IX86_BUILTIN_PADDSB,
15656 IX86_BUILTIN_PADDSW,
15657 IX86_BUILTIN_PADDUSB,
15658 IX86_BUILTIN_PADDUSW,
15659 IX86_BUILTIN_PSUBB,
15660 IX86_BUILTIN_PSUBW,
15661 IX86_BUILTIN_PSUBD,
15662 IX86_BUILTIN_PSUBQ,
15663 IX86_BUILTIN_PSUBSB,
15664 IX86_BUILTIN_PSUBSW,
15665 IX86_BUILTIN_PSUBUSB,
15666 IX86_BUILTIN_PSUBUSW,
15669 IX86_BUILTIN_PANDN,
15673 IX86_BUILTIN_PAVGB,
15674 IX86_BUILTIN_PAVGW,
15676 IX86_BUILTIN_PCMPEQB,
15677 IX86_BUILTIN_PCMPEQW,
15678 IX86_BUILTIN_PCMPEQD,
15679 IX86_BUILTIN_PCMPGTB,
15680 IX86_BUILTIN_PCMPGTW,
15681 IX86_BUILTIN_PCMPGTD,
15683 IX86_BUILTIN_PMADDWD,
15685 IX86_BUILTIN_PMAXSW,
15686 IX86_BUILTIN_PMAXUB,
15687 IX86_BUILTIN_PMINSW,
15688 IX86_BUILTIN_PMINUB,
15690 IX86_BUILTIN_PMULHUW,
15691 IX86_BUILTIN_PMULHW,
15692 IX86_BUILTIN_PMULLW,
15694 IX86_BUILTIN_PSADBW,
15695 IX86_BUILTIN_PSHUFW,
15697 IX86_BUILTIN_PSLLW,
15698 IX86_BUILTIN_PSLLD,
15699 IX86_BUILTIN_PSLLQ,
15700 IX86_BUILTIN_PSRAW,
15701 IX86_BUILTIN_PSRAD,
15702 IX86_BUILTIN_PSRLW,
15703 IX86_BUILTIN_PSRLD,
15704 IX86_BUILTIN_PSRLQ,
15705 IX86_BUILTIN_PSLLWI,
15706 IX86_BUILTIN_PSLLDI,
15707 IX86_BUILTIN_PSLLQI,
15708 IX86_BUILTIN_PSRAWI,
15709 IX86_BUILTIN_PSRADI,
15710 IX86_BUILTIN_PSRLWI,
15711 IX86_BUILTIN_PSRLDI,
15712 IX86_BUILTIN_PSRLQI,
15714 IX86_BUILTIN_PUNPCKHBW,
15715 IX86_BUILTIN_PUNPCKHWD,
15716 IX86_BUILTIN_PUNPCKHDQ,
15717 IX86_BUILTIN_PUNPCKLBW,
15718 IX86_BUILTIN_PUNPCKLWD,
15719 IX86_BUILTIN_PUNPCKLDQ,
15721 IX86_BUILTIN_SHUFPS,
15723 IX86_BUILTIN_RCPPS,
15724 IX86_BUILTIN_RCPSS,
15725 IX86_BUILTIN_RSQRTPS,
15726 IX86_BUILTIN_RSQRTSS,
15727 IX86_BUILTIN_SQRTPS,
15728 IX86_BUILTIN_SQRTSS,
15730 IX86_BUILTIN_UNPCKHPS,
15731 IX86_BUILTIN_UNPCKLPS,
15733 IX86_BUILTIN_ANDPS,
15734 IX86_BUILTIN_ANDNPS,
15736 IX86_BUILTIN_XORPS,
15739 IX86_BUILTIN_LDMXCSR,
15740 IX86_BUILTIN_STMXCSR,
15741 IX86_BUILTIN_SFENCE,
15743 /* 3DNow! Original */
15744 IX86_BUILTIN_FEMMS,
15745 IX86_BUILTIN_PAVGUSB,
15746 IX86_BUILTIN_PF2ID,
15747 IX86_BUILTIN_PFACC,
15748 IX86_BUILTIN_PFADD,
15749 IX86_BUILTIN_PFCMPEQ,
15750 IX86_BUILTIN_PFCMPGE,
15751 IX86_BUILTIN_PFCMPGT,
15752 IX86_BUILTIN_PFMAX,
15753 IX86_BUILTIN_PFMIN,
15754 IX86_BUILTIN_PFMUL,
15755 IX86_BUILTIN_PFRCP,
15756 IX86_BUILTIN_PFRCPIT1,
15757 IX86_BUILTIN_PFRCPIT2,
15758 IX86_BUILTIN_PFRSQIT1,
15759 IX86_BUILTIN_PFRSQRT,
15760 IX86_BUILTIN_PFSUB,
15761 IX86_BUILTIN_PFSUBR,
15762 IX86_BUILTIN_PI2FD,
15763 IX86_BUILTIN_PMULHRW,
15765 /* 3DNow! Athlon Extensions */
15766 IX86_BUILTIN_PF2IW,
15767 IX86_BUILTIN_PFNACC,
15768 IX86_BUILTIN_PFPNACC,
15769 IX86_BUILTIN_PI2FW,
15770 IX86_BUILTIN_PSWAPDSI,
15771 IX86_BUILTIN_PSWAPDSF,
15774 IX86_BUILTIN_ADDPD,
15775 IX86_BUILTIN_ADDSD,
15776 IX86_BUILTIN_DIVPD,
15777 IX86_BUILTIN_DIVSD,
15778 IX86_BUILTIN_MULPD,
15779 IX86_BUILTIN_MULSD,
15780 IX86_BUILTIN_SUBPD,
15781 IX86_BUILTIN_SUBSD,
15783 IX86_BUILTIN_CMPEQPD,
15784 IX86_BUILTIN_CMPLTPD,
15785 IX86_BUILTIN_CMPLEPD,
15786 IX86_BUILTIN_CMPGTPD,
15787 IX86_BUILTIN_CMPGEPD,
15788 IX86_BUILTIN_CMPNEQPD,
15789 IX86_BUILTIN_CMPNLTPD,
15790 IX86_BUILTIN_CMPNLEPD,
15791 IX86_BUILTIN_CMPNGTPD,
15792 IX86_BUILTIN_CMPNGEPD,
15793 IX86_BUILTIN_CMPORDPD,
15794 IX86_BUILTIN_CMPUNORDPD,
15795 IX86_BUILTIN_CMPEQSD,
15796 IX86_BUILTIN_CMPLTSD,
15797 IX86_BUILTIN_CMPLESD,
15798 IX86_BUILTIN_CMPNEQSD,
15799 IX86_BUILTIN_CMPNLTSD,
15800 IX86_BUILTIN_CMPNLESD,
15801 IX86_BUILTIN_CMPORDSD,
15802 IX86_BUILTIN_CMPUNORDSD,
15804 IX86_BUILTIN_COMIEQSD,
15805 IX86_BUILTIN_COMILTSD,
15806 IX86_BUILTIN_COMILESD,
15807 IX86_BUILTIN_COMIGTSD,
15808 IX86_BUILTIN_COMIGESD,
15809 IX86_BUILTIN_COMINEQSD,
15810 IX86_BUILTIN_UCOMIEQSD,
15811 IX86_BUILTIN_UCOMILTSD,
15812 IX86_BUILTIN_UCOMILESD,
15813 IX86_BUILTIN_UCOMIGTSD,
15814 IX86_BUILTIN_UCOMIGESD,
15815 IX86_BUILTIN_UCOMINEQSD,
15817 IX86_BUILTIN_MAXPD,
15818 IX86_BUILTIN_MAXSD,
15819 IX86_BUILTIN_MINPD,
15820 IX86_BUILTIN_MINSD,
15822 IX86_BUILTIN_ANDPD,
15823 IX86_BUILTIN_ANDNPD,
15825 IX86_BUILTIN_XORPD,
15827 IX86_BUILTIN_SQRTPD,
15828 IX86_BUILTIN_SQRTSD,
15830 IX86_BUILTIN_UNPCKHPD,
15831 IX86_BUILTIN_UNPCKLPD,
15833 IX86_BUILTIN_SHUFPD,
15835 IX86_BUILTIN_LOADUPD,
15836 IX86_BUILTIN_STOREUPD,
15837 IX86_BUILTIN_MOVSD,
15839 IX86_BUILTIN_LOADHPD,
15840 IX86_BUILTIN_LOADLPD,
15842 IX86_BUILTIN_CVTDQ2PD,
15843 IX86_BUILTIN_CVTDQ2PS,
15845 IX86_BUILTIN_CVTPD2DQ,
15846 IX86_BUILTIN_CVTPD2PI,
15847 IX86_BUILTIN_CVTPD2PS,
15848 IX86_BUILTIN_CVTTPD2DQ,
15849 IX86_BUILTIN_CVTTPD2PI,
15851 IX86_BUILTIN_CVTPI2PD,
15852 IX86_BUILTIN_CVTSI2SD,
15853 IX86_BUILTIN_CVTSI642SD,
15855 IX86_BUILTIN_CVTSD2SI,
15856 IX86_BUILTIN_CVTSD2SI64,
15857 IX86_BUILTIN_CVTSD2SS,
15858 IX86_BUILTIN_CVTSS2SD,
15859 IX86_BUILTIN_CVTTSD2SI,
15860 IX86_BUILTIN_CVTTSD2SI64,
15862 IX86_BUILTIN_CVTPS2DQ,
15863 IX86_BUILTIN_CVTPS2PD,
15864 IX86_BUILTIN_CVTTPS2DQ,
15866 IX86_BUILTIN_MOVNTI,
15867 IX86_BUILTIN_MOVNTPD,
15868 IX86_BUILTIN_MOVNTDQ,
15871 IX86_BUILTIN_MASKMOVDQU,
15872 IX86_BUILTIN_MOVMSKPD,
15873 IX86_BUILTIN_PMOVMSKB128,
15875 IX86_BUILTIN_PACKSSWB128,
15876 IX86_BUILTIN_PACKSSDW128,
15877 IX86_BUILTIN_PACKUSWB128,
15879 IX86_BUILTIN_PADDB128,
15880 IX86_BUILTIN_PADDW128,
15881 IX86_BUILTIN_PADDD128,
15882 IX86_BUILTIN_PADDQ128,
15883 IX86_BUILTIN_PADDSB128,
15884 IX86_BUILTIN_PADDSW128,
15885 IX86_BUILTIN_PADDUSB128,
15886 IX86_BUILTIN_PADDUSW128,
15887 IX86_BUILTIN_PSUBB128,
15888 IX86_BUILTIN_PSUBW128,
15889 IX86_BUILTIN_PSUBD128,
15890 IX86_BUILTIN_PSUBQ128,
15891 IX86_BUILTIN_PSUBSB128,
15892 IX86_BUILTIN_PSUBSW128,
15893 IX86_BUILTIN_PSUBUSB128,
15894 IX86_BUILTIN_PSUBUSW128,
15896 IX86_BUILTIN_PAND128,
15897 IX86_BUILTIN_PANDN128,
15898 IX86_BUILTIN_POR128,
15899 IX86_BUILTIN_PXOR128,
15901 IX86_BUILTIN_PAVGB128,
15902 IX86_BUILTIN_PAVGW128,
15904 IX86_BUILTIN_PCMPEQB128,
15905 IX86_BUILTIN_PCMPEQW128,
15906 IX86_BUILTIN_PCMPEQD128,
15907 IX86_BUILTIN_PCMPGTB128,
15908 IX86_BUILTIN_PCMPGTW128,
15909 IX86_BUILTIN_PCMPGTD128,
15911 IX86_BUILTIN_PMADDWD128,
15913 IX86_BUILTIN_PMAXSW128,
15914 IX86_BUILTIN_PMAXUB128,
15915 IX86_BUILTIN_PMINSW128,
15916 IX86_BUILTIN_PMINUB128,
15918 IX86_BUILTIN_PMULUDQ,
15919 IX86_BUILTIN_PMULUDQ128,
15920 IX86_BUILTIN_PMULHUW128,
15921 IX86_BUILTIN_PMULHW128,
15922 IX86_BUILTIN_PMULLW128,
15924 IX86_BUILTIN_PSADBW128,
15925 IX86_BUILTIN_PSHUFHW,
15926 IX86_BUILTIN_PSHUFLW,
15927 IX86_BUILTIN_PSHUFD,
15929 IX86_BUILTIN_PSLLDQI128,
15930 IX86_BUILTIN_PSLLWI128,
15931 IX86_BUILTIN_PSLLDI128,
15932 IX86_BUILTIN_PSLLQI128,
15933 IX86_BUILTIN_PSRAWI128,
15934 IX86_BUILTIN_PSRADI128,
15935 IX86_BUILTIN_PSRLDQI128,
15936 IX86_BUILTIN_PSRLWI128,
15937 IX86_BUILTIN_PSRLDI128,
15938 IX86_BUILTIN_PSRLQI128,
15940 IX86_BUILTIN_PSLLDQ128,
15941 IX86_BUILTIN_PSLLW128,
15942 IX86_BUILTIN_PSLLD128,
15943 IX86_BUILTIN_PSLLQ128,
15944 IX86_BUILTIN_PSRAW128,
15945 IX86_BUILTIN_PSRAD128,
15946 IX86_BUILTIN_PSRLW128,
15947 IX86_BUILTIN_PSRLD128,
15948 IX86_BUILTIN_PSRLQ128,
15950 IX86_BUILTIN_PUNPCKHBW128,
15951 IX86_BUILTIN_PUNPCKHWD128,
15952 IX86_BUILTIN_PUNPCKHDQ128,
15953 IX86_BUILTIN_PUNPCKHQDQ128,
15954 IX86_BUILTIN_PUNPCKLBW128,
15955 IX86_BUILTIN_PUNPCKLWD128,
15956 IX86_BUILTIN_PUNPCKLDQ128,
15957 IX86_BUILTIN_PUNPCKLQDQ128,
15959 IX86_BUILTIN_CLFLUSH,
15960 IX86_BUILTIN_MFENCE,
15961 IX86_BUILTIN_LFENCE,
15963 /* Prescott New Instructions. */
15964 IX86_BUILTIN_ADDSUBPS,
15965 IX86_BUILTIN_HADDPS,
15966 IX86_BUILTIN_HSUBPS,
15967 IX86_BUILTIN_MOVSHDUP,
15968 IX86_BUILTIN_MOVSLDUP,
15969 IX86_BUILTIN_ADDSUBPD,
15970 IX86_BUILTIN_HADDPD,
15971 IX86_BUILTIN_HSUBPD,
15972 IX86_BUILTIN_LDDQU,
15974 IX86_BUILTIN_MONITOR,
15975 IX86_BUILTIN_MWAIT,
15978 IX86_BUILTIN_PHADDW,
15979 IX86_BUILTIN_PHADDD,
15980 IX86_BUILTIN_PHADDSW,
15981 IX86_BUILTIN_PHSUBW,
15982 IX86_BUILTIN_PHSUBD,
15983 IX86_BUILTIN_PHSUBSW,
15984 IX86_BUILTIN_PMADDUBSW,
15985 IX86_BUILTIN_PMULHRSW,
15986 IX86_BUILTIN_PSHUFB,
15987 IX86_BUILTIN_PSIGNB,
15988 IX86_BUILTIN_PSIGNW,
15989 IX86_BUILTIN_PSIGND,
15990 IX86_BUILTIN_PALIGNR,
15991 IX86_BUILTIN_PABSB,
15992 IX86_BUILTIN_PABSW,
15993 IX86_BUILTIN_PABSD,
15995 IX86_BUILTIN_PHADDW128,
15996 IX86_BUILTIN_PHADDD128,
15997 IX86_BUILTIN_PHADDSW128,
15998 IX86_BUILTIN_PHSUBW128,
15999 IX86_BUILTIN_PHSUBD128,
16000 IX86_BUILTIN_PHSUBSW128,
16001 IX86_BUILTIN_PMADDUBSW128,
16002 IX86_BUILTIN_PMULHRSW128,
16003 IX86_BUILTIN_PSHUFB128,
16004 IX86_BUILTIN_PSIGNB128,
16005 IX86_BUILTIN_PSIGNW128,
16006 IX86_BUILTIN_PSIGND128,
16007 IX86_BUILTIN_PALIGNR128,
16008 IX86_BUILTIN_PABSB128,
16009 IX86_BUILTIN_PABSW128,
16010 IX86_BUILTIN_PABSD128,
16012 /* AMDFAM10 - SSE4A New Instructions. */
16013 IX86_BUILTIN_MOVNTSD,
16014 IX86_BUILTIN_MOVNTSS,
16015 IX86_BUILTIN_EXTRQI,
16016 IX86_BUILTIN_EXTRQ,
16017 IX86_BUILTIN_INSERTQI,
16018 IX86_BUILTIN_INSERTQ,
16020 IX86_BUILTIN_VEC_INIT_V2SI,
16021 IX86_BUILTIN_VEC_INIT_V4HI,
16022 IX86_BUILTIN_VEC_INIT_V8QI,
16023 IX86_BUILTIN_VEC_EXT_V2DF,
16024 IX86_BUILTIN_VEC_EXT_V2DI,
16025 IX86_BUILTIN_VEC_EXT_V4SF,
16026 IX86_BUILTIN_VEC_EXT_V4SI,
16027 IX86_BUILTIN_VEC_EXT_V8HI,
16028 IX86_BUILTIN_VEC_EXT_V2SI,
16029 IX86_BUILTIN_VEC_EXT_V4HI,
16030 IX86_BUILTIN_VEC_SET_V8HI,
16031 IX86_BUILTIN_VEC_SET_V4HI,
16036 /* Table for the ix86 builtin decls. */
16037 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16039 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16040 * if the target_flags include one of MASK. Stores the function decl
16041 * in the ix86_builtins array.
16042 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16045 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16047 tree decl = NULL_TREE;
16049 if (mask & target_flags
16050 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16052 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16054 ix86_builtins[(int) code] = decl;
16060 /* Like def_builtin, but also marks the function decl "const". */
16063 def_builtin_const (int mask, const char *name, tree type,
16064 enum ix86_builtins code)
16066 tree decl = def_builtin (mask, name, type, code);
16068 TREE_READONLY (decl) = 1;
16072 /* Bits for builtin_description.flag. */
16074 /* Set when we don't support the comparison natively, and should
16075 swap_comparison in order to support it. */
16076 #define BUILTIN_DESC_SWAP_OPERANDS 1
16078 struct builtin_description
16080 const unsigned int mask;
16081 const enum insn_code icode;
16082 const char *const name;
16083 const enum ix86_builtins code;
16084 const enum rtx_code comparison;
16085 const unsigned int flag;
16088 static const struct builtin_description bdesc_comi[] =
16090 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16091 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16092 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16093 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16094 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16095 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16096 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16097 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16098 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16099 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16100 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16101 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16102 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16103 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16104 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16105 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16106 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16107 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16108 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16109 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16110 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16111 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16112 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16113 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16116 static const struct builtin_description bdesc_2arg[] =
16119 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16120 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16121 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16122 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16123 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16124 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16125 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16126 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16128 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16129 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16130 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16131 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16132 BUILTIN_DESC_SWAP_OPERANDS },
16133 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16134 BUILTIN_DESC_SWAP_OPERANDS },
16135 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16136 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16137 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16138 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16139 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16140 BUILTIN_DESC_SWAP_OPERANDS },
16141 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16142 BUILTIN_DESC_SWAP_OPERANDS },
16143 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16144 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16145 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16146 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16147 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16148 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16149 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16150 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16151 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16152 BUILTIN_DESC_SWAP_OPERANDS },
16153 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16154 BUILTIN_DESC_SWAP_OPERANDS },
16155 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16157 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16158 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16159 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16160 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16162 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16163 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16164 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16165 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16167 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16168 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16169 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16170 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16171 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16174 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16175 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16176 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16177 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16178 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16179 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16180 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16181 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16183 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16184 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16185 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16186 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16187 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16188 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16189 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16190 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16192 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16193 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16194 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16196 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16197 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16198 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16199 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16201 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16202 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16204 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16205 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16206 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16207 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16208 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16209 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16211 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16212 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16213 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16214 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16216 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16217 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16218 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16219 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16220 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16221 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16224 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16225 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16226 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16228 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16229 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16230 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16232 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16233 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16234 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16235 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16236 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16237 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16239 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16240 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16241 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16242 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16243 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16244 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16246 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16247 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16248 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16249 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16251 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16252 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16255 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16256 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16257 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16258 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16259 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16260 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16261 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16262 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16264 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16265 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16266 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16267 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16268 BUILTIN_DESC_SWAP_OPERANDS },
16269 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16270 BUILTIN_DESC_SWAP_OPERANDS },
16271 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16272 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16273 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16274 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16275 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16276 BUILTIN_DESC_SWAP_OPERANDS },
16277 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16278 BUILTIN_DESC_SWAP_OPERANDS },
16279 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16280 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16281 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16282 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16283 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16284 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16285 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16286 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16287 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16289 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16290 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16291 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16292 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16294 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16295 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16296 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16297 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16299 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16300 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16301 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16304 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16305 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16306 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16307 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16308 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16309 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16310 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16311 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16313 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16314 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16315 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16316 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16317 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16318 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16319 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16320 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16322 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16323 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16325 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16326 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16327 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16328 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16330 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16331 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16333 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16334 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16335 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16336 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16337 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16338 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16340 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16341 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16342 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16343 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16345 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16346 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16347 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16348 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16349 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16350 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16351 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16352 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16354 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16355 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16356 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16358 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16359 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16364 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16365 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16366 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16368 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16372 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16373 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16375 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16378 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16379 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16380 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16383 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16384 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16385 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16386 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16387 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16388 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16391 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16392 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16393 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16394 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16395 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16396 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16397 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16398 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16399 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16400 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16401 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16402 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16403 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16404 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16405 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16406 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16407 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16408 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16409 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16410 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16411 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16412 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16413 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16414 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16417 static const struct builtin_description bdesc_1arg[] =
16419 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16420 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16422 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16423 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16424 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16426 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16427 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16428 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16429 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16430 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16431 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16433 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16434 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16436 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16438 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16439 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16441 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16442 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16443 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16444 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16445 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16450 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16451 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16452 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16454 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16455 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16456 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16459 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16460 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16463 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16464 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16465 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16466 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16467 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16468 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16471 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16472 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16475 ix86_init_mmx_sse_builtins (void)
16477 const struct builtin_description * d;
16480 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16481 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16482 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16483 tree V2DI_type_node
16484 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16485 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16486 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16487 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16488 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16489 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16490 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16492 tree pchar_type_node = build_pointer_type (char_type_node);
16493 tree pcchar_type_node = build_pointer_type (
16494 build_type_variant (char_type_node, 1, 0));
16495 tree pfloat_type_node = build_pointer_type (float_type_node);
16496 tree pcfloat_type_node = build_pointer_type (
16497 build_type_variant (float_type_node, 1, 0));
16498 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16499 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16500 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16503 tree int_ftype_v4sf_v4sf
16504 = build_function_type_list (integer_type_node,
16505 V4SF_type_node, V4SF_type_node, NULL_TREE);
16506 tree v4si_ftype_v4sf_v4sf
16507 = build_function_type_list (V4SI_type_node,
16508 V4SF_type_node, V4SF_type_node, NULL_TREE);
16509 /* MMX/SSE/integer conversions. */
16510 tree int_ftype_v4sf
16511 = build_function_type_list (integer_type_node,
16512 V4SF_type_node, NULL_TREE);
16513 tree int64_ftype_v4sf
16514 = build_function_type_list (long_long_integer_type_node,
16515 V4SF_type_node, NULL_TREE);
16516 tree int_ftype_v8qi
16517 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16518 tree v4sf_ftype_v4sf_int
16519 = build_function_type_list (V4SF_type_node,
16520 V4SF_type_node, integer_type_node, NULL_TREE);
16521 tree v4sf_ftype_v4sf_int64
16522 = build_function_type_list (V4SF_type_node,
16523 V4SF_type_node, long_long_integer_type_node,
16525 tree v4sf_ftype_v4sf_v2si
16526 = build_function_type_list (V4SF_type_node,
16527 V4SF_type_node, V2SI_type_node, NULL_TREE);
16529 /* Miscellaneous. */
16530 tree v8qi_ftype_v4hi_v4hi
16531 = build_function_type_list (V8QI_type_node,
16532 V4HI_type_node, V4HI_type_node, NULL_TREE);
16533 tree v4hi_ftype_v2si_v2si
16534 = build_function_type_list (V4HI_type_node,
16535 V2SI_type_node, V2SI_type_node, NULL_TREE);
16536 tree v4sf_ftype_v4sf_v4sf_int
16537 = build_function_type_list (V4SF_type_node,
16538 V4SF_type_node, V4SF_type_node,
16539 integer_type_node, NULL_TREE);
16540 tree v2si_ftype_v4hi_v4hi
16541 = build_function_type_list (V2SI_type_node,
16542 V4HI_type_node, V4HI_type_node, NULL_TREE);
16543 tree v4hi_ftype_v4hi_int
16544 = build_function_type_list (V4HI_type_node,
16545 V4HI_type_node, integer_type_node, NULL_TREE);
16546 tree v4hi_ftype_v4hi_di
16547 = build_function_type_list (V4HI_type_node,
16548 V4HI_type_node, long_long_unsigned_type_node,
16550 tree v2si_ftype_v2si_di
16551 = build_function_type_list (V2SI_type_node,
16552 V2SI_type_node, long_long_unsigned_type_node,
16554 tree void_ftype_void
16555 = build_function_type (void_type_node, void_list_node);
16556 tree void_ftype_unsigned
16557 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16558 tree void_ftype_unsigned_unsigned
16559 = build_function_type_list (void_type_node, unsigned_type_node,
16560 unsigned_type_node, NULL_TREE);
16561 tree void_ftype_pcvoid_unsigned_unsigned
16562 = build_function_type_list (void_type_node, const_ptr_type_node,
16563 unsigned_type_node, unsigned_type_node,
16565 tree unsigned_ftype_void
16566 = build_function_type (unsigned_type_node, void_list_node);
16567 tree v2si_ftype_v4sf
16568 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16569 /* Loads/stores. */
16570 tree void_ftype_v8qi_v8qi_pchar
16571 = build_function_type_list (void_type_node,
16572 V8QI_type_node, V8QI_type_node,
16573 pchar_type_node, NULL_TREE);
16574 tree v4sf_ftype_pcfloat
16575 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16576 /* @@@ the type is bogus */
16577 tree v4sf_ftype_v4sf_pv2si
16578 = build_function_type_list (V4SF_type_node,
16579 V4SF_type_node, pv2si_type_node, NULL_TREE);
16580 tree void_ftype_pv2si_v4sf
16581 = build_function_type_list (void_type_node,
16582 pv2si_type_node, V4SF_type_node, NULL_TREE);
16583 tree void_ftype_pfloat_v4sf
16584 = build_function_type_list (void_type_node,
16585 pfloat_type_node, V4SF_type_node, NULL_TREE);
16586 tree void_ftype_pdi_di
16587 = build_function_type_list (void_type_node,
16588 pdi_type_node, long_long_unsigned_type_node,
16590 tree void_ftype_pv2di_v2di
16591 = build_function_type_list (void_type_node,
16592 pv2di_type_node, V2DI_type_node, NULL_TREE);
16593 /* Normal vector unops. */
16594 tree v4sf_ftype_v4sf
16595 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16596 tree v16qi_ftype_v16qi
16597 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16598 tree v8hi_ftype_v8hi
16599 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16600 tree v4si_ftype_v4si
16601 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16602 tree v8qi_ftype_v8qi
16603 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16604 tree v4hi_ftype_v4hi
16605 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16607 /* Normal vector binops. */
16608 tree v4sf_ftype_v4sf_v4sf
16609 = build_function_type_list (V4SF_type_node,
16610 V4SF_type_node, V4SF_type_node, NULL_TREE);
16611 tree v8qi_ftype_v8qi_v8qi
16612 = build_function_type_list (V8QI_type_node,
16613 V8QI_type_node, V8QI_type_node, NULL_TREE);
16614 tree v4hi_ftype_v4hi_v4hi
16615 = build_function_type_list (V4HI_type_node,
16616 V4HI_type_node, V4HI_type_node, NULL_TREE);
16617 tree v2si_ftype_v2si_v2si
16618 = build_function_type_list (V2SI_type_node,
16619 V2SI_type_node, V2SI_type_node, NULL_TREE);
16620 tree di_ftype_di_di
16621 = build_function_type_list (long_long_unsigned_type_node,
16622 long_long_unsigned_type_node,
16623 long_long_unsigned_type_node, NULL_TREE);
16625 tree di_ftype_di_di_int
16626 = build_function_type_list (long_long_unsigned_type_node,
16627 long_long_unsigned_type_node,
16628 long_long_unsigned_type_node,
16629 integer_type_node, NULL_TREE);
16631 tree v2si_ftype_v2sf
16632 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16633 tree v2sf_ftype_v2si
16634 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16635 tree v2si_ftype_v2si
16636 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16637 tree v2sf_ftype_v2sf
16638 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16639 tree v2sf_ftype_v2sf_v2sf
16640 = build_function_type_list (V2SF_type_node,
16641 V2SF_type_node, V2SF_type_node, NULL_TREE);
16642 tree v2si_ftype_v2sf_v2sf
16643 = build_function_type_list (V2SI_type_node,
16644 V2SF_type_node, V2SF_type_node, NULL_TREE);
16645 tree pint_type_node = build_pointer_type (integer_type_node);
16646 tree pdouble_type_node = build_pointer_type (double_type_node);
16647 tree pcdouble_type_node = build_pointer_type (
16648 build_type_variant (double_type_node, 1, 0));
16649 tree int_ftype_v2df_v2df
16650 = build_function_type_list (integer_type_node,
16651 V2DF_type_node, V2DF_type_node, NULL_TREE);
16653 tree void_ftype_pcvoid
16654 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16655 tree v4sf_ftype_v4si
16656 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16657 tree v4si_ftype_v4sf
16658 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16659 tree v2df_ftype_v4si
16660 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16661 tree v4si_ftype_v2df
16662 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16663 tree v2si_ftype_v2df
16664 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16665 tree v4sf_ftype_v2df
16666 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16667 tree v2df_ftype_v2si
16668 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16669 tree v2df_ftype_v4sf
16670 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16671 tree int_ftype_v2df
16672 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16673 tree int64_ftype_v2df
16674 = build_function_type_list (long_long_integer_type_node,
16675 V2DF_type_node, NULL_TREE);
16676 tree v2df_ftype_v2df_int
16677 = build_function_type_list (V2DF_type_node,
16678 V2DF_type_node, integer_type_node, NULL_TREE);
16679 tree v2df_ftype_v2df_int64
16680 = build_function_type_list (V2DF_type_node,
16681 V2DF_type_node, long_long_integer_type_node,
16683 tree v4sf_ftype_v4sf_v2df
16684 = build_function_type_list (V4SF_type_node,
16685 V4SF_type_node, V2DF_type_node, NULL_TREE);
16686 tree v2df_ftype_v2df_v4sf
16687 = build_function_type_list (V2DF_type_node,
16688 V2DF_type_node, V4SF_type_node, NULL_TREE);
16689 tree v2df_ftype_v2df_v2df_int
16690 = build_function_type_list (V2DF_type_node,
16691 V2DF_type_node, V2DF_type_node,
16694 tree v2df_ftype_v2df_pcdouble
16695 = build_function_type_list (V2DF_type_node,
16696 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16697 tree void_ftype_pdouble_v2df
16698 = build_function_type_list (void_type_node,
16699 pdouble_type_node, V2DF_type_node, NULL_TREE);
16700 tree void_ftype_pint_int
16701 = build_function_type_list (void_type_node,
16702 pint_type_node, integer_type_node, NULL_TREE);
16703 tree void_ftype_v16qi_v16qi_pchar
16704 = build_function_type_list (void_type_node,
16705 V16QI_type_node, V16QI_type_node,
16706 pchar_type_node, NULL_TREE);
16707 tree v2df_ftype_pcdouble
16708 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16709 tree v2df_ftype_v2df_v2df
16710 = build_function_type_list (V2DF_type_node,
16711 V2DF_type_node, V2DF_type_node, NULL_TREE);
16712 tree v16qi_ftype_v16qi_v16qi
16713 = build_function_type_list (V16QI_type_node,
16714 V16QI_type_node, V16QI_type_node, NULL_TREE);
16715 tree v8hi_ftype_v8hi_v8hi
16716 = build_function_type_list (V8HI_type_node,
16717 V8HI_type_node, V8HI_type_node, NULL_TREE);
16718 tree v4si_ftype_v4si_v4si
16719 = build_function_type_list (V4SI_type_node,
16720 V4SI_type_node, V4SI_type_node, NULL_TREE);
16721 tree v2di_ftype_v2di_v2di
16722 = build_function_type_list (V2DI_type_node,
16723 V2DI_type_node, V2DI_type_node, NULL_TREE);
16724 tree v2di_ftype_v2df_v2df
16725 = build_function_type_list (V2DI_type_node,
16726 V2DF_type_node, V2DF_type_node, NULL_TREE);
16727 tree v2df_ftype_v2df
16728 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16729 tree v2di_ftype_v2di_int
16730 = build_function_type_list (V2DI_type_node,
16731 V2DI_type_node, integer_type_node, NULL_TREE);
16732 tree v2di_ftype_v2di_v2di_int
16733 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16734 V2DI_type_node, integer_type_node, NULL_TREE);
16735 tree v4si_ftype_v4si_int
16736 = build_function_type_list (V4SI_type_node,
16737 V4SI_type_node, integer_type_node, NULL_TREE);
16738 tree v8hi_ftype_v8hi_int
16739 = build_function_type_list (V8HI_type_node,
16740 V8HI_type_node, integer_type_node, NULL_TREE);
16741 tree v4si_ftype_v8hi_v8hi
16742 = build_function_type_list (V4SI_type_node,
16743 V8HI_type_node, V8HI_type_node, NULL_TREE);
16744 tree di_ftype_v8qi_v8qi
16745 = build_function_type_list (long_long_unsigned_type_node,
16746 V8QI_type_node, V8QI_type_node, NULL_TREE);
16747 tree di_ftype_v2si_v2si
16748 = build_function_type_list (long_long_unsigned_type_node,
16749 V2SI_type_node, V2SI_type_node, NULL_TREE);
16750 tree v2di_ftype_v16qi_v16qi
16751 = build_function_type_list (V2DI_type_node,
16752 V16QI_type_node, V16QI_type_node, NULL_TREE);
16753 tree v2di_ftype_v4si_v4si
16754 = build_function_type_list (V2DI_type_node,
16755 V4SI_type_node, V4SI_type_node, NULL_TREE);
16756 tree int_ftype_v16qi
16757 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16758 tree v16qi_ftype_pcchar
16759 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16760 tree void_ftype_pchar_v16qi
16761 = build_function_type_list (void_type_node,
16762 pchar_type_node, V16QI_type_node, NULL_TREE);
16764 tree v2di_ftype_v2di_unsigned_unsigned
16765 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16766 unsigned_type_node, unsigned_type_node,
16768 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16769 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16770 unsigned_type_node, unsigned_type_node,
16772 tree v2di_ftype_v2di_v16qi
16773 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16777 tree float128_type;
16780 /* The __float80 type. */
16781 if (TYPE_MODE (long_double_type_node) == XFmode)
16782 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16786 /* The __float80 type. */
16787 float80_type = make_node (REAL_TYPE);
16788 TYPE_PRECISION (float80_type) = 80;
16789 layout_type (float80_type);
16790 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16795 float128_type = make_node (REAL_TYPE);
16796 TYPE_PRECISION (float128_type) = 128;
16797 layout_type (float128_type);
16798 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16801 /* Add all builtins that are more or less simple operations on two
16803 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16805 /* Use one of the operands; the target can have a different mode for
16806 mask-generating compares. */
16807 enum machine_mode mode;
16812 mode = insn_data[d->icode].operand[1].mode;
16817 type = v16qi_ftype_v16qi_v16qi;
16820 type = v8hi_ftype_v8hi_v8hi;
16823 type = v4si_ftype_v4si_v4si;
16826 type = v2di_ftype_v2di_v2di;
16829 type = v2df_ftype_v2df_v2df;
16832 type = v4sf_ftype_v4sf_v4sf;
16835 type = v8qi_ftype_v8qi_v8qi;
16838 type = v4hi_ftype_v4hi_v4hi;
16841 type = v2si_ftype_v2si_v2si;
16844 type = di_ftype_di_di;
16848 gcc_unreachable ();
16851 /* Override for comparisons. */
16852 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16853 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16854 type = v4si_ftype_v4sf_v4sf;
16856 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16857 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16858 type = v2di_ftype_v2df_v2df;
16860 def_builtin (d->mask, d->name, type, d->code);
16863 /* Add all builtins that are more or less simple operations on 1 operand. */
16864 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16866 enum machine_mode mode;
16871 mode = insn_data[d->icode].operand[1].mode;
16876 type = v16qi_ftype_v16qi;
16879 type = v8hi_ftype_v8hi;
16882 type = v4si_ftype_v4si;
16885 type = v2df_ftype_v2df;
16888 type = v4sf_ftype_v4sf;
16891 type = v8qi_ftype_v8qi;
16894 type = v4hi_ftype_v4hi;
16897 type = v2si_ftype_v2si;
16904 def_builtin (d->mask, d->name, type, d->code);
16907 /* Add the remaining MMX insns with somewhat more complicated types. */
16908 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16909 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16910 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16911 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16913 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16914 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16915 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16917 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16918 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16920 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16921 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16923 /* comi/ucomi insns. */
16924 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16925 if (d->mask == MASK_SSE2)
16926 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16928 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16930 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16931 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16932 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16934 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16935 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16936 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16937 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16938 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16939 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16940 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16941 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16942 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16943 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16944 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16946 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16948 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16949 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16951 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16952 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16953 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16954 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16956 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16957 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16958 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16959 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16961 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16963 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16965 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16966 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16967 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16968 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16969 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16970 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16972 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16974 /* Original 3DNow! */
16975 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16976 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16977 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16978 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16979 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16980 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16981 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16982 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16983 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16984 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16985 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16986 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16987 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16988 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16989 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16990 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16991 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16992 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16993 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16994 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16996 /* 3DNow! extension as used in the Athlon CPU. */
16997 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
16998 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
16999 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17000 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17001 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17002 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17005 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17007 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17008 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17010 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17011 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17013 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17014 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17015 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17016 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17017 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17019 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17020 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17021 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17022 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17024 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17025 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17027 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17029 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17030 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17032 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17033 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17034 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17035 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17036 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17038 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17040 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17041 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17042 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17043 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17045 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17046 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17047 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17049 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17050 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17051 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17052 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17054 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17055 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17056 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17058 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17059 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17061 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17062 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17064 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17065 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17066 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17067 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17068 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17069 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17070 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17072 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17073 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17074 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17075 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17076 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17077 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17078 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17080 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17081 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17082 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17083 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17085 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17087 /* Prescott New Instructions. */
17088 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17089 void_ftype_pcvoid_unsigned_unsigned,
17090 IX86_BUILTIN_MONITOR);
17091 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17092 void_ftype_unsigned_unsigned,
17093 IX86_BUILTIN_MWAIT);
17094 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17095 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17098 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17099 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17100 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17101 IX86_BUILTIN_PALIGNR);
17103 /* AMDFAM10 SSE4A New built-ins */
17104 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17105 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17106 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17107 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17108 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17109 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17110 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17111 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17112 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17113 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17114 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17115 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17117 /* Access to the vec_init patterns. */
17118 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17119 integer_type_node, NULL_TREE);
17120 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17121 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17123 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17124 short_integer_type_node,
17125 short_integer_type_node,
17126 short_integer_type_node, NULL_TREE);
17127 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17128 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17130 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17131 char_type_node, char_type_node,
17132 char_type_node, char_type_node,
17133 char_type_node, char_type_node,
17134 char_type_node, NULL_TREE);
17135 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17136 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17138 /* Access to the vec_extract patterns. */
17139 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17140 integer_type_node, NULL_TREE);
17141 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17142 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17144 ftype = build_function_type_list (long_long_integer_type_node,
17145 V2DI_type_node, integer_type_node,
17147 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17148 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17150 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17151 integer_type_node, NULL_TREE);
17152 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17153 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17155 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17156 integer_type_node, NULL_TREE);
17157 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17158 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17160 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17161 integer_type_node, NULL_TREE);
17162 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17163 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17165 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17166 integer_type_node, NULL_TREE);
17167 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17168 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17170 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17171 integer_type_node, NULL_TREE);
17172 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17173 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17175 /* Access to the vec_set patterns. */
17176 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17178 integer_type_node, NULL_TREE);
17179 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17180 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17182 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17184 integer_type_node, NULL_TREE);
17185 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17186 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17190 ix86_init_builtins (void)
17193 ix86_init_mmx_sse_builtins ();
17196 /* Errors in the source file can cause expand_expr to return const0_rtx
17197 where we expect a vector. To avoid crashing, use one of the vector
17198 clear instructions. */
17200 safe_vector_operand (rtx x, enum machine_mode mode)
17202 if (x == const0_rtx)
17203 x = CONST0_RTX (mode);
17207 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17210 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17213 tree arg0 = CALL_EXPR_ARG (exp, 0);
17214 tree arg1 = CALL_EXPR_ARG (exp, 1);
17215 rtx op0 = expand_normal (arg0);
17216 rtx op1 = expand_normal (arg1);
17217 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17218 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17219 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17221 if (VECTOR_MODE_P (mode0))
17222 op0 = safe_vector_operand (op0, mode0);
17223 if (VECTOR_MODE_P (mode1))
17224 op1 = safe_vector_operand (op1, mode1);
17226 if (optimize || !target
17227 || GET_MODE (target) != tmode
17228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17229 target = gen_reg_rtx (tmode);
17231 if (GET_MODE (op1) == SImode && mode1 == TImode)
17233 rtx x = gen_reg_rtx (V4SImode);
17234 emit_insn (gen_sse2_loadd (x, op1));
17235 op1 = gen_lowpart (TImode, x);
17238 /* The insn must want input operands in the same modes as the
17240 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17241 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17243 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17244 op0 = copy_to_mode_reg (mode0, op0);
17245 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17246 op1 = copy_to_mode_reg (mode1, op1);
17248 /* ??? Using ix86_fixup_binary_operands is problematic when
17249 we've got mismatched modes. Fake it. */
17255 if (tmode == mode0 && tmode == mode1)
17257 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17261 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17263 op0 = force_reg (mode0, op0);
17264 op1 = force_reg (mode1, op1);
17265 target = gen_reg_rtx (tmode);
17268 pat = GEN_FCN (icode) (target, op0, op1);
17275 /* Subroutine of ix86_expand_builtin to take care of stores. */
17278 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17281 tree arg0 = CALL_EXPR_ARG (exp, 0);
17282 tree arg1 = CALL_EXPR_ARG (exp, 1);
17283 rtx op0 = expand_normal (arg0);
17284 rtx op1 = expand_normal (arg1);
17285 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17286 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17288 if (VECTOR_MODE_P (mode1))
17289 op1 = safe_vector_operand (op1, mode1);
17291 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17292 op1 = copy_to_mode_reg (mode1, op1);
17294 pat = GEN_FCN (icode) (op0, op1);
17300 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17303 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17304 rtx target, int do_load)
17307 tree arg0 = CALL_EXPR_ARG (exp, 0);
17308 rtx op0 = expand_normal (arg0);
17309 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17310 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17312 if (optimize || !target
17313 || GET_MODE (target) != tmode
17314 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17315 target = gen_reg_rtx (tmode);
17317 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17320 if (VECTOR_MODE_P (mode0))
17321 op0 = safe_vector_operand (op0, mode0);
17323 if ((optimize && !register_operand (op0, mode0))
17324 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17325 op0 = copy_to_mode_reg (mode0, op0);
17328 pat = GEN_FCN (icode) (target, op0);
17335 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17336 sqrtss, rsqrtss, rcpss. */
17339 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17342 tree arg0 = CALL_EXPR_ARG (exp, 0);
17343 rtx op1, op0 = expand_normal (arg0);
17344 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17345 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17347 if (optimize || !target
17348 || GET_MODE (target) != tmode
17349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17350 target = gen_reg_rtx (tmode);
17352 if (VECTOR_MODE_P (mode0))
17353 op0 = safe_vector_operand (op0, mode0);
17355 if ((optimize && !register_operand (op0, mode0))
17356 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17357 op0 = copy_to_mode_reg (mode0, op0);
17360 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17361 op1 = copy_to_mode_reg (mode0, op1);
17363 pat = GEN_FCN (icode) (target, op0, op1);
17370 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17373 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17377 tree arg0 = CALL_EXPR_ARG (exp, 0);
17378 tree arg1 = CALL_EXPR_ARG (exp, 1);
17379 rtx op0 = expand_normal (arg0);
17380 rtx op1 = expand_normal (arg1);
17382 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17383 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17384 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17385 enum rtx_code comparison = d->comparison;
17387 if (VECTOR_MODE_P (mode0))
17388 op0 = safe_vector_operand (op0, mode0);
17389 if (VECTOR_MODE_P (mode1))
17390 op1 = safe_vector_operand (op1, mode1);
17392 /* Swap operands if we have a comparison that isn't available in
17394 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17396 rtx tmp = gen_reg_rtx (mode1);
17397 emit_move_insn (tmp, op1);
17402 if (optimize || !target
17403 || GET_MODE (target) != tmode
17404 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17405 target = gen_reg_rtx (tmode);
17407 if ((optimize && !register_operand (op0, mode0))
17408 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17409 op0 = copy_to_mode_reg (mode0, op0);
17410 if ((optimize && !register_operand (op1, mode1))
17411 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17412 op1 = copy_to_mode_reg (mode1, op1);
17414 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17415 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17422 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17425 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17429 tree arg0 = CALL_EXPR_ARG (exp, 0);
17430 tree arg1 = CALL_EXPR_ARG (exp, 1);
17431 rtx op0 = expand_normal (arg0);
17432 rtx op1 = expand_normal (arg1);
17434 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17435 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17436 enum rtx_code comparison = d->comparison;
17438 if (VECTOR_MODE_P (mode0))
17439 op0 = safe_vector_operand (op0, mode0);
17440 if (VECTOR_MODE_P (mode1))
17441 op1 = safe_vector_operand (op1, mode1);
17443 /* Swap operands if we have a comparison that isn't available in
17445 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17452 target = gen_reg_rtx (SImode);
17453 emit_move_insn (target, const0_rtx);
17454 target = gen_rtx_SUBREG (QImode, target, 0);
17456 if ((optimize && !register_operand (op0, mode0))
17457 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17458 op0 = copy_to_mode_reg (mode0, op0);
17459 if ((optimize && !register_operand (op1, mode1))
17460 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17461 op1 = copy_to_mode_reg (mode1, op1);
17463 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17464 pat = GEN_FCN (d->icode) (op0, op1);
17468 emit_insn (gen_rtx_SET (VOIDmode,
17469 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17470 gen_rtx_fmt_ee (comparison, QImode,
17474 return SUBREG_REG (target);
17477 /* Return the integer constant in ARG. Constrain it to be in the range
17478 of the subparts of VEC_TYPE; issue an error if not. */
17481 get_element_number (tree vec_type, tree arg)
17483 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17485 if (!host_integerp (arg, 1)
17486 || (elt = tree_low_cst (arg, 1), elt > max))
17488 error ("selector must be an integer constant in the range 0..%wi", max);
17495 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17496 ix86_expand_vector_init. We DO have language-level syntax for this, in
17497 the form of (type){ init-list }. Except that since we can't place emms
17498 instructions from inside the compiler, we can't allow the use of MMX
17499 registers unless the user explicitly asks for it. So we do *not* define
17500 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17501 we have builtins invoked by mmintrin.h that gives us license to emit
17502 these sorts of instructions. */
17505 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17507 enum machine_mode tmode = TYPE_MODE (type);
17508 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17509 int i, n_elt = GET_MODE_NUNITS (tmode);
17510 rtvec v = rtvec_alloc (n_elt);
17512 gcc_assert (VECTOR_MODE_P (tmode));
17513 gcc_assert (call_expr_nargs (exp) == n_elt);
17515 for (i = 0; i < n_elt; ++i)
17517 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17518 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17521 if (!target || !register_operand (target, tmode))
17522 target = gen_reg_rtx (tmode);
17524 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17528 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17529 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17530 had a language-level syntax for referencing vector elements. */
17533 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17535 enum machine_mode tmode, mode0;
17540 arg0 = CALL_EXPR_ARG (exp, 0);
17541 arg1 = CALL_EXPR_ARG (exp, 1);
17543 op0 = expand_normal (arg0);
17544 elt = get_element_number (TREE_TYPE (arg0), arg1);
17546 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17547 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17548 gcc_assert (VECTOR_MODE_P (mode0));
17550 op0 = force_reg (mode0, op0);
17552 if (optimize || !target || !register_operand (target, tmode))
17553 target = gen_reg_rtx (tmode);
17555 ix86_expand_vector_extract (true, target, op0, elt);
17560 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17561 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17562 a language-level syntax for referencing vector elements. */
17565 ix86_expand_vec_set_builtin (tree exp)
17567 enum machine_mode tmode, mode1;
17568 tree arg0, arg1, arg2;
17572 arg0 = CALL_EXPR_ARG (exp, 0);
17573 arg1 = CALL_EXPR_ARG (exp, 1);
17574 arg2 = CALL_EXPR_ARG (exp, 2);
17576 tmode = TYPE_MODE (TREE_TYPE (arg0));
17577 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17578 gcc_assert (VECTOR_MODE_P (tmode));
17580 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17581 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17582 elt = get_element_number (TREE_TYPE (arg0), arg2);
17584 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17585 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17587 op0 = force_reg (tmode, op0);
17588 op1 = force_reg (mode1, op1);
17590 ix86_expand_vector_set (true, op0, op1, elt);
17595 /* Expand an expression EXP that calls a built-in function,
17596 with result going to TARGET if that's convenient
17597 (and in mode MODE if that's convenient).
17598 SUBTARGET may be used as the target for computing one of EXP's operands.
17599 IGNORE is nonzero if the value is to be ignored. */
17602 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17603 enum machine_mode mode ATTRIBUTE_UNUSED,
17604 int ignore ATTRIBUTE_UNUSED)
17606 const struct builtin_description *d;
17608 enum insn_code icode;
17609 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17610 tree arg0, arg1, arg2, arg3;
17611 rtx op0, op1, op2, op3, pat;
17612 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17613 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17617 case IX86_BUILTIN_EMMS:
17618 emit_insn (gen_mmx_emms ());
17621 case IX86_BUILTIN_SFENCE:
17622 emit_insn (gen_sse_sfence ());
17625 case IX86_BUILTIN_MASKMOVQ:
17626 case IX86_BUILTIN_MASKMOVDQU:
17627 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17628 ? CODE_FOR_mmx_maskmovq
17629 : CODE_FOR_sse2_maskmovdqu);
17630 /* Note the arg order is different from the operand order. */
17631 arg1 = CALL_EXPR_ARG (exp, 0);
17632 arg2 = CALL_EXPR_ARG (exp, 1);
17633 arg0 = CALL_EXPR_ARG (exp, 2);
17634 op0 = expand_normal (arg0);
17635 op1 = expand_normal (arg1);
17636 op2 = expand_normal (arg2);
17637 mode0 = insn_data[icode].operand[0].mode;
17638 mode1 = insn_data[icode].operand[1].mode;
17639 mode2 = insn_data[icode].operand[2].mode;
17641 op0 = force_reg (Pmode, op0);
17642 op0 = gen_rtx_MEM (mode1, op0);
17644 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17645 op0 = copy_to_mode_reg (mode0, op0);
17646 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17647 op1 = copy_to_mode_reg (mode1, op1);
17648 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17649 op2 = copy_to_mode_reg (mode2, op2);
17650 pat = GEN_FCN (icode) (op0, op1, op2);
17656 case IX86_BUILTIN_SQRTSS:
17657 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17658 case IX86_BUILTIN_RSQRTSS:
17659 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17660 case IX86_BUILTIN_RCPSS:
17661 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17663 case IX86_BUILTIN_LOADUPS:
17664 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17666 case IX86_BUILTIN_STOREUPS:
17667 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17669 case IX86_BUILTIN_LOADHPS:
17670 case IX86_BUILTIN_LOADLPS:
17671 case IX86_BUILTIN_LOADHPD:
17672 case IX86_BUILTIN_LOADLPD:
17673 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17674 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17675 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17676 : CODE_FOR_sse2_loadlpd);
17677 arg0 = CALL_EXPR_ARG (exp, 0);
17678 arg1 = CALL_EXPR_ARG (exp, 1);
17679 op0 = expand_normal (arg0);
17680 op1 = expand_normal (arg1);
17681 tmode = insn_data[icode].operand[0].mode;
17682 mode0 = insn_data[icode].operand[1].mode;
17683 mode1 = insn_data[icode].operand[2].mode;
17685 op0 = force_reg (mode0, op0);
17686 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17687 if (optimize || target == 0
17688 || GET_MODE (target) != tmode
17689 || !register_operand (target, tmode))
17690 target = gen_reg_rtx (tmode);
17691 pat = GEN_FCN (icode) (target, op0, op1);
17697 case IX86_BUILTIN_STOREHPS:
17698 case IX86_BUILTIN_STORELPS:
17699 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17700 : CODE_FOR_sse_storelps);
17701 arg0 = CALL_EXPR_ARG (exp, 0);
17702 arg1 = CALL_EXPR_ARG (exp, 1);
17703 op0 = expand_normal (arg0);
17704 op1 = expand_normal (arg1);
17705 mode0 = insn_data[icode].operand[0].mode;
17706 mode1 = insn_data[icode].operand[1].mode;
17708 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17709 op1 = force_reg (mode1, op1);
17711 pat = GEN_FCN (icode) (op0, op1);
17717 case IX86_BUILTIN_MOVNTPS:
17718 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17719 case IX86_BUILTIN_MOVNTQ:
17720 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17722 case IX86_BUILTIN_LDMXCSR:
17723 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17724 target = assign_386_stack_local (SImode, SLOT_TEMP);
17725 emit_move_insn (target, op0);
17726 emit_insn (gen_sse_ldmxcsr (target));
17729 case IX86_BUILTIN_STMXCSR:
17730 target = assign_386_stack_local (SImode, SLOT_TEMP);
17731 emit_insn (gen_sse_stmxcsr (target));
17732 return copy_to_mode_reg (SImode, target);
17734 case IX86_BUILTIN_SHUFPS:
17735 case IX86_BUILTIN_SHUFPD:
17736 icode = (fcode == IX86_BUILTIN_SHUFPS
17737 ? CODE_FOR_sse_shufps
17738 : CODE_FOR_sse2_shufpd);
17739 arg0 = CALL_EXPR_ARG (exp, 0);
17740 arg1 = CALL_EXPR_ARG (exp, 1);
17741 arg2 = CALL_EXPR_ARG (exp, 2);
17742 op0 = expand_normal (arg0);
17743 op1 = expand_normal (arg1);
17744 op2 = expand_normal (arg2);
17745 tmode = insn_data[icode].operand[0].mode;
17746 mode0 = insn_data[icode].operand[1].mode;
17747 mode1 = insn_data[icode].operand[2].mode;
17748 mode2 = insn_data[icode].operand[3].mode;
17750 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17751 op0 = copy_to_mode_reg (mode0, op0);
17752 if ((optimize && !register_operand (op1, mode1))
17753 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17754 op1 = copy_to_mode_reg (mode1, op1);
17755 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17757 /* @@@ better error message */
17758 error ("mask must be an immediate");
17759 return gen_reg_rtx (tmode);
17761 if (optimize || target == 0
17762 || GET_MODE (target) != tmode
17763 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17764 target = gen_reg_rtx (tmode);
17765 pat = GEN_FCN (icode) (target, op0, op1, op2);
17771 case IX86_BUILTIN_PSHUFW:
17772 case IX86_BUILTIN_PSHUFD:
17773 case IX86_BUILTIN_PSHUFHW:
17774 case IX86_BUILTIN_PSHUFLW:
17775 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17776 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17777 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17778 : CODE_FOR_mmx_pshufw);
17779 arg0 = CALL_EXPR_ARG (exp, 0);
17780 arg1 = CALL_EXPR_ARG (exp, 1);
17781 op0 = expand_normal (arg0);
17782 op1 = expand_normal (arg1);
17783 tmode = insn_data[icode].operand[0].mode;
17784 mode1 = insn_data[icode].operand[1].mode;
17785 mode2 = insn_data[icode].operand[2].mode;
17787 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17788 op0 = copy_to_mode_reg (mode1, op0);
17789 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17791 /* @@@ better error message */
17792 error ("mask must be an immediate");
17796 || GET_MODE (target) != tmode
17797 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17798 target = gen_reg_rtx (tmode);
17799 pat = GEN_FCN (icode) (target, op0, op1);
17805 case IX86_BUILTIN_PSLLWI128:
17806 icode = CODE_FOR_ashlv8hi3;
17808 case IX86_BUILTIN_PSLLDI128:
17809 icode = CODE_FOR_ashlv4si3;
17811 case IX86_BUILTIN_PSLLQI128:
17812 icode = CODE_FOR_ashlv2di3;
17814 case IX86_BUILTIN_PSRAWI128:
17815 icode = CODE_FOR_ashrv8hi3;
17817 case IX86_BUILTIN_PSRADI128:
17818 icode = CODE_FOR_ashrv4si3;
17820 case IX86_BUILTIN_PSRLWI128:
17821 icode = CODE_FOR_lshrv8hi3;
17823 case IX86_BUILTIN_PSRLDI128:
17824 icode = CODE_FOR_lshrv4si3;
17826 case IX86_BUILTIN_PSRLQI128:
17827 icode = CODE_FOR_lshrv2di3;
17830 arg0 = CALL_EXPR_ARG (exp, 0);
17831 arg1 = CALL_EXPR_ARG (exp, 1);
17832 op0 = expand_normal (arg0);
17833 op1 = expand_normal (arg1);
17835 if (!CONST_INT_P (op1))
17837 error ("shift must be an immediate");
17840 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
17841 op1 = GEN_INT (255);
17843 tmode = insn_data[icode].operand[0].mode;
17844 mode1 = insn_data[icode].operand[1].mode;
17845 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17846 op0 = copy_to_reg (op0);
17848 target = gen_reg_rtx (tmode);
17849 pat = GEN_FCN (icode) (target, op0, op1);
17855 case IX86_BUILTIN_PSLLW128:
17856 icode = CODE_FOR_ashlv8hi3;
17858 case IX86_BUILTIN_PSLLD128:
17859 icode = CODE_FOR_ashlv4si3;
17861 case IX86_BUILTIN_PSLLQ128:
17862 icode = CODE_FOR_ashlv2di3;
17864 case IX86_BUILTIN_PSRAW128:
17865 icode = CODE_FOR_ashrv8hi3;
17867 case IX86_BUILTIN_PSRAD128:
17868 icode = CODE_FOR_ashrv4si3;
17870 case IX86_BUILTIN_PSRLW128:
17871 icode = CODE_FOR_lshrv8hi3;
17873 case IX86_BUILTIN_PSRLD128:
17874 icode = CODE_FOR_lshrv4si3;
17876 case IX86_BUILTIN_PSRLQ128:
17877 icode = CODE_FOR_lshrv2di3;
17880 arg0 = CALL_EXPR_ARG (exp, 0);
17881 arg1 = CALL_EXPR_ARG (exp, 1);
17882 op0 = expand_normal (arg0);
17883 op1 = expand_normal (arg1);
17885 tmode = insn_data[icode].operand[0].mode;
17886 mode1 = insn_data[icode].operand[1].mode;
17888 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17889 op0 = copy_to_reg (op0);
17891 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
17892 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
17893 op1 = copy_to_reg (op1);
17895 target = gen_reg_rtx (tmode);
17896 pat = GEN_FCN (icode) (target, op0, op1);
17902 case IX86_BUILTIN_PSLLDQI128:
17903 case IX86_BUILTIN_PSRLDQI128:
17904 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17905 : CODE_FOR_sse2_lshrti3);
17906 arg0 = CALL_EXPR_ARG (exp, 0);
17907 arg1 = CALL_EXPR_ARG (exp, 1);
17908 op0 = expand_normal (arg0);
17909 op1 = expand_normal (arg1);
17910 tmode = insn_data[icode].operand[0].mode;
17911 mode1 = insn_data[icode].operand[1].mode;
17912 mode2 = insn_data[icode].operand[2].mode;
17914 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17916 op0 = copy_to_reg (op0);
17917 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17919 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17921 error ("shift must be an immediate");
17924 target = gen_reg_rtx (V2DImode);
17925 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
17932 case IX86_BUILTIN_FEMMS:
17933 emit_insn (gen_mmx_femms ());
17936 case IX86_BUILTIN_PAVGUSB:
17937 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17939 case IX86_BUILTIN_PF2ID:
17940 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17942 case IX86_BUILTIN_PFACC:
17943 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17945 case IX86_BUILTIN_PFADD:
17946 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17948 case IX86_BUILTIN_PFCMPEQ:
17949 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17951 case IX86_BUILTIN_PFCMPGE:
17952 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17954 case IX86_BUILTIN_PFCMPGT:
17955 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17957 case IX86_BUILTIN_PFMAX:
17958 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17960 case IX86_BUILTIN_PFMIN:
17961 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17963 case IX86_BUILTIN_PFMUL:
17964 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17966 case IX86_BUILTIN_PFRCP:
17967 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17969 case IX86_BUILTIN_PFRCPIT1:
17970 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17972 case IX86_BUILTIN_PFRCPIT2:
17973 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17975 case IX86_BUILTIN_PFRSQIT1:
17976 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17978 case IX86_BUILTIN_PFRSQRT:
17979 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17981 case IX86_BUILTIN_PFSUB:
17982 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17984 case IX86_BUILTIN_PFSUBR:
17985 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17987 case IX86_BUILTIN_PI2FD:
17988 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17990 case IX86_BUILTIN_PMULHRW:
17991 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17993 case IX86_BUILTIN_PF2IW:
17994 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17996 case IX86_BUILTIN_PFNACC:
17997 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
17999 case IX86_BUILTIN_PFPNACC:
18000 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18002 case IX86_BUILTIN_PI2FW:
18003 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18005 case IX86_BUILTIN_PSWAPDSI:
18006 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18008 case IX86_BUILTIN_PSWAPDSF:
18009 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18011 case IX86_BUILTIN_SQRTSD:
18012 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18013 case IX86_BUILTIN_LOADUPD:
18014 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18015 case IX86_BUILTIN_STOREUPD:
18016 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18018 case IX86_BUILTIN_MFENCE:
18019 emit_insn (gen_sse2_mfence ());
18021 case IX86_BUILTIN_LFENCE:
18022 emit_insn (gen_sse2_lfence ());
18025 case IX86_BUILTIN_CLFLUSH:
18026 arg0 = CALL_EXPR_ARG (exp, 0);
18027 op0 = expand_normal (arg0);
18028 icode = CODE_FOR_sse2_clflush;
18029 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18030 op0 = copy_to_mode_reg (Pmode, op0);
18032 emit_insn (gen_sse2_clflush (op0));
18035 case IX86_BUILTIN_MOVNTPD:
18036 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18037 case IX86_BUILTIN_MOVNTDQ:
18038 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18039 case IX86_BUILTIN_MOVNTI:
18040 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18042 case IX86_BUILTIN_LOADDQU:
18043 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18044 case IX86_BUILTIN_STOREDQU:
18045 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18047 case IX86_BUILTIN_MONITOR:
18048 arg0 = CALL_EXPR_ARG (exp, 0);
18049 arg1 = CALL_EXPR_ARG (exp, 1);
18050 arg2 = CALL_EXPR_ARG (exp, 2);
18051 op0 = expand_normal (arg0);
18052 op1 = expand_normal (arg1);
18053 op2 = expand_normal (arg2);
18055 op0 = copy_to_mode_reg (Pmode, op0);
18057 op1 = copy_to_mode_reg (SImode, op1);
18059 op2 = copy_to_mode_reg (SImode, op2);
18061 emit_insn (gen_sse3_monitor (op0, op1, op2));
18063 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18066 case IX86_BUILTIN_MWAIT:
18067 arg0 = CALL_EXPR_ARG (exp, 0);
18068 arg1 = CALL_EXPR_ARG (exp, 1);
18069 op0 = expand_normal (arg0);
18070 op1 = expand_normal (arg1);
18072 op0 = copy_to_mode_reg (SImode, op0);
18074 op1 = copy_to_mode_reg (SImode, op1);
18075 emit_insn (gen_sse3_mwait (op0, op1));
18078 case IX86_BUILTIN_LDDQU:
18079 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18082 case IX86_BUILTIN_PALIGNR:
18083 case IX86_BUILTIN_PALIGNR128:
18084 if (fcode == IX86_BUILTIN_PALIGNR)
18086 icode = CODE_FOR_ssse3_palignrdi;
18091 icode = CODE_FOR_ssse3_palignrti;
18094 arg0 = CALL_EXPR_ARG (exp, 0);
18095 arg1 = CALL_EXPR_ARG (exp, 1);
18096 arg2 = CALL_EXPR_ARG (exp, 2);
18097 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18098 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18099 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18100 tmode = insn_data[icode].operand[0].mode;
18101 mode1 = insn_data[icode].operand[1].mode;
18102 mode2 = insn_data[icode].operand[2].mode;
18103 mode3 = insn_data[icode].operand[3].mode;
18105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18107 op0 = copy_to_reg (op0);
18108 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18112 op1 = copy_to_reg (op1);
18113 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18115 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18117 error ("shift must be an immediate");
18120 target = gen_reg_rtx (mode);
18121 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18128 case IX86_BUILTIN_MOVNTSD:
18129 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18131 case IX86_BUILTIN_MOVNTSS:
18132 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18134 case IX86_BUILTIN_INSERTQ:
18135 case IX86_BUILTIN_EXTRQ:
18136 icode = (fcode == IX86_BUILTIN_EXTRQ
18137 ? CODE_FOR_sse4a_extrq
18138 : CODE_FOR_sse4a_insertq);
18139 arg0 = CALL_EXPR_ARG (exp, 0);
18140 arg1 = CALL_EXPR_ARG (exp, 1);
18141 op0 = expand_normal (arg0);
18142 op1 = expand_normal (arg1);
18143 tmode = insn_data[icode].operand[0].mode;
18144 mode1 = insn_data[icode].operand[1].mode;
18145 mode2 = insn_data[icode].operand[2].mode;
18146 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18147 op0 = copy_to_mode_reg (mode1, op0);
18148 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18149 op1 = copy_to_mode_reg (mode2, op1);
18150 if (optimize || target == 0
18151 || GET_MODE (target) != tmode
18152 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18153 target = gen_reg_rtx (tmode);
18154 pat = GEN_FCN (icode) (target, op0, op1);
18160 case IX86_BUILTIN_EXTRQI:
18161 icode = CODE_FOR_sse4a_extrqi;
18162 arg0 = CALL_EXPR_ARG (exp, 0);
18163 arg1 = CALL_EXPR_ARG (exp, 1);
18164 arg2 = CALL_EXPR_ARG (exp, 2);
18165 op0 = expand_normal (arg0);
18166 op1 = expand_normal (arg1);
18167 op2 = expand_normal (arg2);
18168 tmode = insn_data[icode].operand[0].mode;
18169 mode1 = insn_data[icode].operand[1].mode;
18170 mode2 = insn_data[icode].operand[2].mode;
18171 mode3 = insn_data[icode].operand[3].mode;
18172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18173 op0 = copy_to_mode_reg (mode1, op0);
18174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18176 error ("index mask must be an immediate");
18177 return gen_reg_rtx (tmode);
18179 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18181 error ("length mask must be an immediate");
18182 return gen_reg_rtx (tmode);
18184 if (optimize || target == 0
18185 || GET_MODE (target) != tmode
18186 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18187 target = gen_reg_rtx (tmode);
18188 pat = GEN_FCN (icode) (target, op0, op1, op2);
18194 case IX86_BUILTIN_INSERTQI:
18195 icode = CODE_FOR_sse4a_insertqi;
18196 arg0 = CALL_EXPR_ARG (exp, 0);
18197 arg1 = CALL_EXPR_ARG (exp, 1);
18198 arg2 = CALL_EXPR_ARG (exp, 2);
18199 arg3 = CALL_EXPR_ARG (exp, 3);
18200 op0 = expand_normal (arg0);
18201 op1 = expand_normal (arg1);
18202 op2 = expand_normal (arg2);
18203 op3 = expand_normal (arg3);
18204 tmode = insn_data[icode].operand[0].mode;
18205 mode1 = insn_data[icode].operand[1].mode;
18206 mode2 = insn_data[icode].operand[2].mode;
18207 mode3 = insn_data[icode].operand[3].mode;
18208 mode4 = insn_data[icode].operand[4].mode;
18210 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18211 op0 = copy_to_mode_reg (mode1, op0);
18213 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18214 op1 = copy_to_mode_reg (mode2, op1);
18216 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18218 error ("index mask must be an immediate");
18219 return gen_reg_rtx (tmode);
18221 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18223 error ("length mask must be an immediate");
18224 return gen_reg_rtx (tmode);
18226 if (optimize || target == 0
18227 || GET_MODE (target) != tmode
18228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18229 target = gen_reg_rtx (tmode);
18230 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18236 case IX86_BUILTIN_VEC_INIT_V2SI:
18237 case IX86_BUILTIN_VEC_INIT_V4HI:
18238 case IX86_BUILTIN_VEC_INIT_V8QI:
18239 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18241 case IX86_BUILTIN_VEC_EXT_V2DF:
18242 case IX86_BUILTIN_VEC_EXT_V2DI:
18243 case IX86_BUILTIN_VEC_EXT_V4SF:
18244 case IX86_BUILTIN_VEC_EXT_V4SI:
18245 case IX86_BUILTIN_VEC_EXT_V8HI:
18246 case IX86_BUILTIN_VEC_EXT_V2SI:
18247 case IX86_BUILTIN_VEC_EXT_V4HI:
18248 return ix86_expand_vec_ext_builtin (exp, target);
18250 case IX86_BUILTIN_VEC_SET_V8HI:
18251 case IX86_BUILTIN_VEC_SET_V4HI:
18252 return ix86_expand_vec_set_builtin (exp);
18258 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18259 if (d->code == fcode)
18261 /* Compares are treated specially. */
18262 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18263 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18264 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18265 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18266 return ix86_expand_sse_compare (d, exp, target);
18268 return ix86_expand_binop_builtin (d->icode, exp, target);
18271 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18272 if (d->code == fcode)
18273 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18275 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18276 if (d->code == fcode)
18277 return ix86_expand_sse_comi (d, exp, target);
18279 gcc_unreachable ();
18282 /* Returns a function decl for a vectorized version of the builtin function
18283 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18284 if it is not available. */
18287 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18290 enum machine_mode in_mode, out_mode;
18293 if (TREE_CODE (type_out) != VECTOR_TYPE
18294 || TREE_CODE (type_in) != VECTOR_TYPE)
18297 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18298 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18299 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18300 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18304 case BUILT_IN_SQRT:
18305 if (out_mode == DFmode && out_n == 2
18306 && in_mode == DFmode && in_n == 2)
18307 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18310 case BUILT_IN_SQRTF:
18311 if (out_mode == SFmode && out_n == 4
18312 && in_mode == SFmode && in_n == 4)
18313 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18316 case BUILT_IN_LRINTF:
18317 if (out_mode == SImode && out_n == 4
18318 && in_mode == SFmode && in_n == 4)
18319 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18329 /* Returns a decl of a function that implements conversion of the
18330 input vector of type TYPE, or NULL_TREE if it is not available. */
18333 ix86_builtin_conversion (enum tree_code code, tree type)
18335 if (TREE_CODE (type) != VECTOR_TYPE)
18341 switch (TYPE_MODE (type))
18344 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18349 case FIX_TRUNC_EXPR:
18350 switch (TYPE_MODE (type))
18353 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18363 /* Store OPERAND to the memory after reload is completed. This means
18364 that we can't easily use assign_stack_local. */
18366 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18370 gcc_assert (reload_completed);
18371 if (TARGET_RED_ZONE)
18373 result = gen_rtx_MEM (mode,
18374 gen_rtx_PLUS (Pmode,
18376 GEN_INT (-RED_ZONE_SIZE)));
18377 emit_move_insn (result, operand);
18379 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18385 operand = gen_lowpart (DImode, operand);
18389 gen_rtx_SET (VOIDmode,
18390 gen_rtx_MEM (DImode,
18391 gen_rtx_PRE_DEC (DImode,
18392 stack_pointer_rtx)),
18396 gcc_unreachable ();
18398 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18407 split_di (&operand, 1, operands, operands + 1);
18409 gen_rtx_SET (VOIDmode,
18410 gen_rtx_MEM (SImode,
18411 gen_rtx_PRE_DEC (Pmode,
18412 stack_pointer_rtx)),
18415 gen_rtx_SET (VOIDmode,
18416 gen_rtx_MEM (SImode,
18417 gen_rtx_PRE_DEC (Pmode,
18418 stack_pointer_rtx)),
18423 /* Store HImodes as SImodes. */
18424 operand = gen_lowpart (SImode, operand);
18428 gen_rtx_SET (VOIDmode,
18429 gen_rtx_MEM (GET_MODE (operand),
18430 gen_rtx_PRE_DEC (SImode,
18431 stack_pointer_rtx)),
18435 gcc_unreachable ();
18437 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18442 /* Free operand from the memory. */
18444 ix86_free_from_memory (enum machine_mode mode)
18446 if (!TARGET_RED_ZONE)
18450 if (mode == DImode || TARGET_64BIT)
18454 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18455 to pop or add instruction if registers are available. */
18456 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18457 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18462 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18463 QImode must go into class Q_REGS.
18464 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18465 movdf to do mem-to-mem moves through integer regs. */
18467 ix86_preferred_reload_class (rtx x, enum reg_class class)
18469 enum machine_mode mode = GET_MODE (x);
18471 /* We're only allowed to return a subclass of CLASS. Many of the
18472 following checks fail for NO_REGS, so eliminate that early. */
18473 if (class == NO_REGS)
18476 /* All classes can load zeros. */
18477 if (x == CONST0_RTX (mode))
18480 /* Force constants into memory if we are loading a (nonzero) constant into
18481 an MMX or SSE register. This is because there are no MMX/SSE instructions
18482 to load from a constant. */
18484 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18487 /* Prefer SSE regs only, if we can use them for math. */
18488 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18489 return SSE_CLASS_P (class) ? class : NO_REGS;
18491 /* Floating-point constants need more complex checks. */
18492 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18494 /* General regs can load everything. */
18495 if (reg_class_subset_p (class, GENERAL_REGS))
18498 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18499 zero above. We only want to wind up preferring 80387 registers if
18500 we plan on doing computation with them. */
18502 && standard_80387_constant_p (x))
18504 /* Limit class to non-sse. */
18505 if (class == FLOAT_SSE_REGS)
18507 if (class == FP_TOP_SSE_REGS)
18509 if (class == FP_SECOND_SSE_REGS)
18510 return FP_SECOND_REG;
18511 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18518 /* Generally when we see PLUS here, it's the function invariant
18519 (plus soft-fp const_int). Which can only be computed into general
18521 if (GET_CODE (x) == PLUS)
18522 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18524 /* QImode constants are easy to load, but non-constant QImode data
18525 must go into Q_REGS. */
18526 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18528 if (reg_class_subset_p (class, Q_REGS))
18530 if (reg_class_subset_p (Q_REGS, class))
18538 /* Discourage putting floating-point values in SSE registers unless
18539 SSE math is being used, and likewise for the 387 registers. */
18541 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18543 enum machine_mode mode = GET_MODE (x);
18545 /* Restrict the output reload class to the register bank that we are doing
18546 math on. If we would like not to return a subset of CLASS, reject this
18547 alternative: if reload cannot do this, it will still use its choice. */
18548 mode = GET_MODE (x);
18549 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18550 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18552 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18554 if (class == FP_TOP_SSE_REGS)
18556 else if (class == FP_SECOND_SSE_REGS)
18557 return FP_SECOND_REG;
18559 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18565 /* If we are copying between general and FP registers, we need a memory
18566 location. The same is true for SSE and MMX registers.
18568 The macro can't work reliably when one of the CLASSES is class containing
18569 registers from multiple units (SSE, MMX, integer). We avoid this by never
18570 combining those units in single alternative in the machine description.
18571 Ensure that this constraint holds to avoid unexpected surprises.
18573 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18574 enforce these sanity checks. */
18577 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18578 enum machine_mode mode, int strict)
18580 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18581 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18582 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18583 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18584 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18585 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18587 gcc_assert (!strict);
18591 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18594 /* ??? This is a lie. We do have moves between mmx/general, and for
18595 mmx/sse2. But by saying we need secondary memory we discourage the
18596 register allocator from using the mmx registers unless needed. */
18597 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18600 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18602 /* SSE1 doesn't have any direct moves from other classes. */
18606 /* If the target says that inter-unit moves are more expensive
18607 than moving through memory, then don't generate them. */
18608 if (!TARGET_INTER_UNIT_MOVES)
18611 /* Between SSE and general, we have moves no larger than word size. */
18612 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18619 /* Return true if the registers in CLASS cannot represent the change from
18620 modes FROM to TO. */
18623 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18624 enum reg_class class)
18629 /* x87 registers can't do subreg at all, as all values are reformatted
18630 to extended precision. */
18631 if (MAYBE_FLOAT_CLASS_P (class))
18634 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18636 /* Vector registers do not support QI or HImode loads. If we don't
18637 disallow a change to these modes, reload will assume it's ok to
18638 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18639 the vec_dupv4hi pattern. */
18640 if (GET_MODE_SIZE (from) < 4)
18643 /* Vector registers do not support subreg with nonzero offsets, which
18644 are otherwise valid for integer registers. Since we can't see
18645 whether we have a nonzero offset from here, prohibit all
18646 nonparadoxical subregs changing size. */
18647 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18654 /* Return the cost of moving data from a register in class CLASS1 to
18655 one in class CLASS2.
18657 It is not required that the cost always equal 2 when FROM is the same as TO;
18658 on some machines it is expensive to move between registers if they are not
18659 general registers. */
18662 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18663 enum reg_class class2)
18665 /* In case we require secondary memory, compute cost of the store followed
18666 by load. In order to avoid bad register allocation choices, we need
18667 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18669 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18673 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18674 MEMORY_MOVE_COST (mode, class1, 1));
18675 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18676 MEMORY_MOVE_COST (mode, class2, 1));
18678 /* In case of copying from general_purpose_register we may emit multiple
18679 stores followed by single load causing memory size mismatch stall.
18680 Count this as arbitrarily high cost of 20. */
18681 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18684 /* In the case of FP/MMX moves, the registers actually overlap, and we
18685 have to switch modes in order to treat them differently. */
18686 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18687 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18693 /* Moves between SSE/MMX and integer unit are expensive. */
18694 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18695 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18696 return ix86_cost->mmxsse_to_integer;
18697 if (MAYBE_FLOAT_CLASS_P (class1))
18698 return ix86_cost->fp_move;
18699 if (MAYBE_SSE_CLASS_P (class1))
18700 return ix86_cost->sse_move;
18701 if (MAYBE_MMX_CLASS_P (class1))
18702 return ix86_cost->mmx_move;
18706 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18709 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18711 /* Flags and only flags can only hold CCmode values. */
18712 if (CC_REGNO_P (regno))
18713 return GET_MODE_CLASS (mode) == MODE_CC;
18714 if (GET_MODE_CLASS (mode) == MODE_CC
18715 || GET_MODE_CLASS (mode) == MODE_RANDOM
18716 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18718 if (FP_REGNO_P (regno))
18719 return VALID_FP_MODE_P (mode);
18720 if (SSE_REGNO_P (regno))
18722 /* We implement the move patterns for all vector modes into and
18723 out of SSE registers, even when no operation instructions
18725 return (VALID_SSE_REG_MODE (mode)
18726 || VALID_SSE2_REG_MODE (mode)
18727 || VALID_MMX_REG_MODE (mode)
18728 || VALID_MMX_REG_MODE_3DNOW (mode));
18730 if (MMX_REGNO_P (regno))
18732 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18733 so if the register is available at all, then we can move data of
18734 the given mode into or out of it. */
18735 return (VALID_MMX_REG_MODE (mode)
18736 || VALID_MMX_REG_MODE_3DNOW (mode));
18739 if (mode == QImode)
18741 /* Take care for QImode values - they can be in non-QI regs,
18742 but then they do cause partial register stalls. */
18743 if (regno < 4 || TARGET_64BIT)
18745 if (!TARGET_PARTIAL_REG_STALL)
18747 return reload_in_progress || reload_completed;
18749 /* We handle both integer and floats in the general purpose registers. */
18750 else if (VALID_INT_MODE_P (mode))
18752 else if (VALID_FP_MODE_P (mode))
18754 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18755 on to use that value in smaller contexts, this can easily force a
18756 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18757 supporting DImode, allow it. */
18758 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18764 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18765 tieable integer mode. */
18768 ix86_tieable_integer_mode_p (enum machine_mode mode)
18777 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18780 return TARGET_64BIT;
18787 /* Return true if MODE1 is accessible in a register that can hold MODE2
18788 without copying. That is, all register classes that can hold MODE2
18789 can also hold MODE1. */
18792 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18794 if (mode1 == mode2)
18797 if (ix86_tieable_integer_mode_p (mode1)
18798 && ix86_tieable_integer_mode_p (mode2))
18801 /* MODE2 being XFmode implies fp stack or general regs, which means we
18802 can tie any smaller floating point modes to it. Note that we do not
18803 tie this with TFmode. */
18804 if (mode2 == XFmode)
18805 return mode1 == SFmode || mode1 == DFmode;
18807 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18808 that we can tie it with SFmode. */
18809 if (mode2 == DFmode)
18810 return mode1 == SFmode;
18812 /* If MODE2 is only appropriate for an SSE register, then tie with
18813 any other mode acceptable to SSE registers. */
18814 if (GET_MODE_SIZE (mode2) == 16
18815 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18816 return (GET_MODE_SIZE (mode1) == 16
18817 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18819 /* If MODE2 is appropriate for an MMX register, then tie
18820 with any other mode acceptable to MMX registers. */
18821 if (GET_MODE_SIZE (mode2) == 8
18822 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18823 return (GET_MODE_SIZE (mode1) == 8
18824 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18829 /* Return the cost of moving data of mode M between a
18830 register and memory. A value of 2 is the default; this cost is
18831 relative to those in `REGISTER_MOVE_COST'.
18833 If moving between registers and memory is more expensive than
18834 between two registers, you should define this macro to express the
18837 Model also increased moving costs of QImode registers in non
18841 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18843 if (FLOAT_CLASS_P (class))
18860 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18862 if (SSE_CLASS_P (class))
18865 switch (GET_MODE_SIZE (mode))
18879 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18881 if (MMX_CLASS_P (class))
18884 switch (GET_MODE_SIZE (mode))
18895 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18897 switch (GET_MODE_SIZE (mode))
18901 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18902 : ix86_cost->movzbl_load);
18904 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18905 : ix86_cost->int_store[0] + 4);
18908 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18910 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18911 if (mode == TFmode)
18913 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18914 * (((int) GET_MODE_SIZE (mode)
18915 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18919 /* Compute a (partial) cost for rtx X. Return true if the complete
18920 cost has been computed, and false if subexpressions should be
18921 scanned. In either case, *TOTAL contains the cost result. */
18924 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18926 enum machine_mode mode = GET_MODE (x);
18934 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18936 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18938 else if (flag_pic && SYMBOLIC_CONST (x)
18940 || (!GET_CODE (x) != LABEL_REF
18941 && (GET_CODE (x) != SYMBOL_REF
18942 || !SYMBOL_REF_LOCAL_P (x)))))
18949 if (mode == VOIDmode)
18952 switch (standard_80387_constant_p (x))
18957 default: /* Other constants */
18962 /* Start with (MEM (SYMBOL_REF)), since that's where
18963 it'll probably end up. Add a penalty for size. */
18964 *total = (COSTS_N_INSNS (1)
18965 + (flag_pic != 0 && !TARGET_64BIT)
18966 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18972 /* The zero extensions is often completely free on x86_64, so make
18973 it as cheap as possible. */
18974 if (TARGET_64BIT && mode == DImode
18975 && GET_MODE (XEXP (x, 0)) == SImode)
18977 else if (TARGET_ZERO_EXTEND_WITH_AND)
18978 *total = ix86_cost->add;
18980 *total = ix86_cost->movzx;
18984 *total = ix86_cost->movsx;
18988 if (CONST_INT_P (XEXP (x, 1))
18989 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18991 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18994 *total = ix86_cost->add;
18997 if ((value == 2 || value == 3)
18998 && ix86_cost->lea <= ix86_cost->shift_const)
19000 *total = ix86_cost->lea;
19010 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19012 if (CONST_INT_P (XEXP (x, 1)))
19014 if (INTVAL (XEXP (x, 1)) > 32)
19015 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19017 *total = ix86_cost->shift_const * 2;
19021 if (GET_CODE (XEXP (x, 1)) == AND)
19022 *total = ix86_cost->shift_var * 2;
19024 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19029 if (CONST_INT_P (XEXP (x, 1)))
19030 *total = ix86_cost->shift_const;
19032 *total = ix86_cost->shift_var;
19037 if (FLOAT_MODE_P (mode))
19039 *total = ix86_cost->fmul;
19044 rtx op0 = XEXP (x, 0);
19045 rtx op1 = XEXP (x, 1);
19047 if (CONST_INT_P (XEXP (x, 1)))
19049 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19050 for (nbits = 0; value != 0; value &= value - 1)
19054 /* This is arbitrary. */
19057 /* Compute costs correctly for widening multiplication. */
19058 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19059 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19060 == GET_MODE_SIZE (mode))
19062 int is_mulwiden = 0;
19063 enum machine_mode inner_mode = GET_MODE (op0);
19065 if (GET_CODE (op0) == GET_CODE (op1))
19066 is_mulwiden = 1, op1 = XEXP (op1, 0);
19067 else if (CONST_INT_P (op1))
19069 if (GET_CODE (op0) == SIGN_EXTEND)
19070 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19073 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19077 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19080 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19081 + nbits * ix86_cost->mult_bit
19082 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19091 if (FLOAT_MODE_P (mode))
19092 *total = ix86_cost->fdiv;
19094 *total = ix86_cost->divide[MODE_INDEX (mode)];
19098 if (FLOAT_MODE_P (mode))
19099 *total = ix86_cost->fadd;
19100 else if (GET_MODE_CLASS (mode) == MODE_INT
19101 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19103 if (GET_CODE (XEXP (x, 0)) == PLUS
19104 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19105 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19106 && CONSTANT_P (XEXP (x, 1)))
19108 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19109 if (val == 2 || val == 4 || val == 8)
19111 *total = ix86_cost->lea;
19112 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19113 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19115 *total += rtx_cost (XEXP (x, 1), outer_code);
19119 else if (GET_CODE (XEXP (x, 0)) == MULT
19120 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19122 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19123 if (val == 2 || val == 4 || val == 8)
19125 *total = ix86_cost->lea;
19126 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19127 *total += rtx_cost (XEXP (x, 1), outer_code);
19131 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19133 *total = ix86_cost->lea;
19134 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19135 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19136 *total += rtx_cost (XEXP (x, 1), outer_code);
19143 if (FLOAT_MODE_P (mode))
19145 *total = ix86_cost->fadd;
19153 if (!TARGET_64BIT && mode == DImode)
19155 *total = (ix86_cost->add * 2
19156 + (rtx_cost (XEXP (x, 0), outer_code)
19157 << (GET_MODE (XEXP (x, 0)) != DImode))
19158 + (rtx_cost (XEXP (x, 1), outer_code)
19159 << (GET_MODE (XEXP (x, 1)) != DImode)));
19165 if (FLOAT_MODE_P (mode))
19167 *total = ix86_cost->fchs;
19173 if (!TARGET_64BIT && mode == DImode)
19174 *total = ix86_cost->add * 2;
19176 *total = ix86_cost->add;
19180 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19181 && XEXP (XEXP (x, 0), 1) == const1_rtx
19182 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19183 && XEXP (x, 1) == const0_rtx)
19185 /* This kind of construct is implemented using test[bwl].
19186 Treat it as if we had an AND. */
19187 *total = (ix86_cost->add
19188 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19189 + rtx_cost (const1_rtx, outer_code));
19195 if (!TARGET_SSE_MATH
19197 || (mode == DFmode && !TARGET_SSE2))
19202 if (FLOAT_MODE_P (mode))
19203 *total = ix86_cost->fabs;
19207 if (FLOAT_MODE_P (mode))
19208 *total = ix86_cost->fsqrt;
19212 if (XINT (x, 1) == UNSPEC_TP)
19223 static int current_machopic_label_num;
19225 /* Given a symbol name and its associated stub, write out the
19226 definition of the stub. */
19229 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19231 unsigned int length;
19232 char *binder_name, *symbol_name, lazy_ptr_name[32];
19233 int label = ++current_machopic_label_num;
19235 /* For 64-bit we shouldn't get here. */
19236 gcc_assert (!TARGET_64BIT);
19238 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19239 symb = (*targetm.strip_name_encoding) (symb);
19241 length = strlen (stub);
19242 binder_name = alloca (length + 32);
19243 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19245 length = strlen (symb);
19246 symbol_name = alloca (length + 32);
19247 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19249 sprintf (lazy_ptr_name, "L%d$lz", label);
19252 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19254 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19256 fprintf (file, "%s:\n", stub);
19257 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19261 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19262 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19263 fprintf (file, "\tjmp\t*%%edx\n");
19266 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19268 fprintf (file, "%s:\n", binder_name);
19272 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19273 fprintf (file, "\tpushl\t%%eax\n");
19276 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19278 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19280 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19281 fprintf (file, "%s:\n", lazy_ptr_name);
19282 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19283 fprintf (file, "\t.long %s\n", binder_name);
19287 darwin_x86_file_end (void)
19289 darwin_file_end ();
19292 #endif /* TARGET_MACHO */
19294 /* Order the registers for register allocator. */
19297 x86_order_regs_for_local_alloc (void)
19302 /* First allocate the local general purpose registers. */
19303 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19304 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19305 reg_alloc_order [pos++] = i;
19307 /* Global general purpose registers. */
19308 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19309 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19310 reg_alloc_order [pos++] = i;
19312 /* x87 registers come first in case we are doing FP math
19314 if (!TARGET_SSE_MATH)
19315 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19316 reg_alloc_order [pos++] = i;
19318 /* SSE registers. */
19319 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19320 reg_alloc_order [pos++] = i;
19321 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19322 reg_alloc_order [pos++] = i;
19324 /* x87 registers. */
19325 if (TARGET_SSE_MATH)
19326 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19327 reg_alloc_order [pos++] = i;
19329 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19330 reg_alloc_order [pos++] = i;
19332 /* Initialize the rest of array as we do not allocate some registers
19334 while (pos < FIRST_PSEUDO_REGISTER)
19335 reg_alloc_order [pos++] = 0;
19338 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19339 struct attribute_spec.handler. */
19341 ix86_handle_struct_attribute (tree *node, tree name,
19342 tree args ATTRIBUTE_UNUSED,
19343 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19346 if (DECL_P (*node))
19348 if (TREE_CODE (*node) == TYPE_DECL)
19349 type = &TREE_TYPE (*node);
19354 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19355 || TREE_CODE (*type) == UNION_TYPE)))
19357 warning (OPT_Wattributes, "%qs attribute ignored",
19358 IDENTIFIER_POINTER (name));
19359 *no_add_attrs = true;
19362 else if ((is_attribute_p ("ms_struct", name)
19363 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19364 || ((is_attribute_p ("gcc_struct", name)
19365 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19367 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19368 IDENTIFIER_POINTER (name));
19369 *no_add_attrs = true;
19376 ix86_ms_bitfield_layout_p (tree record_type)
19378 return (TARGET_MS_BITFIELD_LAYOUT &&
19379 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19380 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19383 /* Returns an expression indicating where the this parameter is
19384 located on entry to the FUNCTION. */
19387 x86_this_parameter (tree function)
19389 tree type = TREE_TYPE (function);
19393 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19394 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19397 if (ix86_function_regparm (type, function) > 0)
19401 parm = TYPE_ARG_TYPES (type);
19402 /* Figure out whether or not the function has a variable number of
19404 for (; parm; parm = TREE_CHAIN (parm))
19405 if (TREE_VALUE (parm) == void_type_node)
19407 /* If not, the this parameter is in the first argument. */
19411 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19413 return gen_rtx_REG (SImode, regno);
19417 if (aggregate_value_p (TREE_TYPE (type), type))
19418 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19420 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19423 /* Determine whether x86_output_mi_thunk can succeed. */
19426 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19427 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19428 HOST_WIDE_INT vcall_offset, tree function)
19430 /* 64-bit can handle anything. */
19434 /* For 32-bit, everything's fine if we have one free register. */
19435 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19438 /* Need a free register for vcall_offset. */
19442 /* Need a free register for GOT references. */
19443 if (flag_pic && !(*targetm.binds_local_p) (function))
19446 /* Otherwise ok. */
19450 /* Output the assembler code for a thunk function. THUNK_DECL is the
19451 declaration for the thunk function itself, FUNCTION is the decl for
19452 the target function. DELTA is an immediate constant offset to be
19453 added to THIS. If VCALL_OFFSET is nonzero, the word at
19454 *(*this + vcall_offset) should be added to THIS. */
19457 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19458 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19459 HOST_WIDE_INT vcall_offset, tree function)
19462 rtx this = x86_this_parameter (function);
19465 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19466 pull it in now and let DELTA benefit. */
19469 else if (vcall_offset)
19471 /* Put the this parameter into %eax. */
19473 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19474 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19477 this_reg = NULL_RTX;
19479 /* Adjust the this parameter by a fixed constant. */
19482 xops[0] = GEN_INT (delta);
19483 xops[1] = this_reg ? this_reg : this;
19486 if (!x86_64_general_operand (xops[0], DImode))
19488 tmp = gen_rtx_REG (DImode, R10_REG);
19490 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19494 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19497 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19500 /* Adjust the this parameter by a value stored in the vtable. */
19504 tmp = gen_rtx_REG (DImode, R10_REG);
19507 int tmp_regno = 2 /* ECX */;
19508 if (lookup_attribute ("fastcall",
19509 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19510 tmp_regno = 0 /* EAX */;
19511 tmp = gen_rtx_REG (SImode, tmp_regno);
19514 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19517 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19519 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19521 /* Adjust the this parameter. */
19522 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19523 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19525 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19526 xops[0] = GEN_INT (vcall_offset);
19528 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19529 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19531 xops[1] = this_reg;
19533 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19535 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19538 /* If necessary, drop THIS back to its stack slot. */
19539 if (this_reg && this_reg != this)
19541 xops[0] = this_reg;
19543 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19546 xops[0] = XEXP (DECL_RTL (function), 0);
19549 if (!flag_pic || (*targetm.binds_local_p) (function))
19550 output_asm_insn ("jmp\t%P0", xops);
19553 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19554 tmp = gen_rtx_CONST (Pmode, tmp);
19555 tmp = gen_rtx_MEM (QImode, tmp);
19557 output_asm_insn ("jmp\t%A0", xops);
19562 if (!flag_pic || (*targetm.binds_local_p) (function))
19563 output_asm_insn ("jmp\t%P0", xops);
19568 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19569 tmp = (gen_rtx_SYMBOL_REF
19571 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19572 tmp = gen_rtx_MEM (QImode, tmp);
19574 output_asm_insn ("jmp\t%0", xops);
19577 #endif /* TARGET_MACHO */
19579 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19580 output_set_got (tmp, NULL_RTX);
19583 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19584 output_asm_insn ("jmp\t{*}%1", xops);
19590 x86_file_start (void)
19592 default_file_start ();
19594 darwin_file_start ();
19596 if (X86_FILE_START_VERSION_DIRECTIVE)
19597 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19598 if (X86_FILE_START_FLTUSED)
19599 fputs ("\t.global\t__fltused\n", asm_out_file);
19600 if (ix86_asm_dialect == ASM_INTEL)
19601 fputs ("\t.intel_syntax\n", asm_out_file);
19605 x86_field_alignment (tree field, int computed)
19607 enum machine_mode mode;
19608 tree type = TREE_TYPE (field);
19610 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19612 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19613 ? get_inner_array_type (type) : type);
19614 if (mode == DFmode || mode == DCmode
19615 || GET_MODE_CLASS (mode) == MODE_INT
19616 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19617 return MIN (32, computed);
19621 /* Output assembler code to FILE to increment profiler label # LABELNO
19622 for profiling a function entry. */
19624 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19629 #ifndef NO_PROFILE_COUNTERS
19630 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19632 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19636 #ifndef NO_PROFILE_COUNTERS
19637 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19639 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19643 #ifndef NO_PROFILE_COUNTERS
19644 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19645 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19647 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19651 #ifndef NO_PROFILE_COUNTERS
19652 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19653 PROFILE_COUNT_REGISTER);
19655 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19659 /* We don't have exact information about the insn sizes, but we may assume
19660 quite safely that we are informed about all 1 byte insns and memory
19661 address sizes. This is enough to eliminate unnecessary padding in
19665 min_insn_size (rtx insn)
19669 if (!INSN_P (insn) || !active_insn_p (insn))
19672 /* Discard alignments we've emit and jump instructions. */
19673 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19674 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19677 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19678 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19681 /* Important case - calls are always 5 bytes.
19682 It is common to have many calls in the row. */
19684 && symbolic_reference_mentioned_p (PATTERN (insn))
19685 && !SIBLING_CALL_P (insn))
19687 if (get_attr_length (insn) <= 1)
19690 /* For normal instructions we may rely on the sizes of addresses
19691 and the presence of symbol to require 4 bytes of encoding.
19692 This is not the case for jumps where references are PC relative. */
19693 if (!JUMP_P (insn))
19695 l = get_attr_length_address (insn);
19696 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19705 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19709 ix86_avoid_jump_misspredicts (void)
19711 rtx insn, start = get_insns ();
19712 int nbytes = 0, njumps = 0;
19715 /* Look for all minimal intervals of instructions containing 4 jumps.
19716 The intervals are bounded by START and INSN. NBYTES is the total
19717 size of instructions in the interval including INSN and not including
19718 START. When the NBYTES is smaller than 16 bytes, it is possible
19719 that the end of START and INSN ends up in the same 16byte page.
19721 The smallest offset in the page INSN can start is the case where START
19722 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19723 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19725 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19728 nbytes += min_insn_size (insn);
19730 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19731 INSN_UID (insn), min_insn_size (insn));
19733 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19734 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19742 start = NEXT_INSN (start);
19743 if ((JUMP_P (start)
19744 && GET_CODE (PATTERN (start)) != ADDR_VEC
19745 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19747 njumps--, isjump = 1;
19750 nbytes -= min_insn_size (start);
19752 gcc_assert (njumps >= 0);
19754 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19755 INSN_UID (start), INSN_UID (insn), nbytes);
19757 if (njumps == 3 && isjump && nbytes < 16)
19759 int padsize = 15 - nbytes + min_insn_size (insn);
19762 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19763 INSN_UID (insn), padsize);
19764 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19769 /* AMD Athlon works faster
19770 when RET is not destination of conditional jump or directly preceded
19771 by other jump instruction. We avoid the penalty by inserting NOP just
19772 before the RET instructions in such cases. */
19774 ix86_pad_returns (void)
19779 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19781 basic_block bb = e->src;
19782 rtx ret = BB_END (bb);
19784 bool replace = false;
19786 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19787 || !maybe_hot_bb_p (bb))
19789 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19790 if (active_insn_p (prev) || LABEL_P (prev))
19792 if (prev && LABEL_P (prev))
19797 FOR_EACH_EDGE (e, ei, bb->preds)
19798 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19799 && !(e->flags & EDGE_FALLTHRU))
19804 prev = prev_active_insn (ret);
19806 && ((JUMP_P (prev) && any_condjump_p (prev))
19809 /* Empty functions get branch mispredict even when the jump destination
19810 is not visible to us. */
19811 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19816 emit_insn_before (gen_return_internal_long (), ret);
19822 /* Implement machine specific optimizations. We implement padding of returns
19823 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19827 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19828 ix86_pad_returns ();
19829 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19830 ix86_avoid_jump_misspredicts ();
19833 /* Return nonzero when QImode register that must be represented via REX prefix
19836 x86_extended_QIreg_mentioned_p (rtx insn)
19839 extract_insn_cached (insn);
19840 for (i = 0; i < recog_data.n_operands; i++)
19841 if (REG_P (recog_data.operand[i])
19842 && REGNO (recog_data.operand[i]) >= 4)
19847 /* Return nonzero when P points to register encoded via REX prefix.
19848 Called via for_each_rtx. */
19850 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19852 unsigned int regno;
19855 regno = REGNO (*p);
19856 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19859 /* Return true when INSN mentions register that must be encoded using REX
19862 x86_extended_reg_mentioned_p (rtx insn)
19864 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19867 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19868 optabs would emit if we didn't have TFmode patterns. */
19871 x86_emit_floatuns (rtx operands[2])
19873 rtx neglab, donelab, i0, i1, f0, in, out;
19874 enum machine_mode mode, inmode;
19876 inmode = GET_MODE (operands[1]);
19877 gcc_assert (inmode == SImode || inmode == DImode);
19880 in = force_reg (inmode, operands[1]);
19881 mode = GET_MODE (out);
19882 neglab = gen_label_rtx ();
19883 donelab = gen_label_rtx ();
19884 f0 = gen_reg_rtx (mode);
19886 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19888 expand_float (out, in, 0);
19890 emit_jump_insn (gen_jump (donelab));
19893 emit_label (neglab);
19895 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19897 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19899 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19901 expand_float (f0, i0, 0);
19903 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19905 emit_label (donelab);
19908 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19909 with all elements equal to VAR. Return true if successful. */
19912 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19913 rtx target, rtx val)
19915 enum machine_mode smode, wsmode, wvmode;
19930 val = force_reg (GET_MODE_INNER (mode), val);
19931 x = gen_rtx_VEC_DUPLICATE (mode, val);
19932 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19938 if (TARGET_SSE || TARGET_3DNOW_A)
19940 val = gen_lowpart (SImode, val);
19941 x = gen_rtx_TRUNCATE (HImode, val);
19942 x = gen_rtx_VEC_DUPLICATE (mode, x);
19943 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19965 /* Extend HImode to SImode using a paradoxical SUBREG. */
19966 tmp1 = gen_reg_rtx (SImode);
19967 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19968 /* Insert the SImode value as low element of V4SImode vector. */
19969 tmp2 = gen_reg_rtx (V4SImode);
19970 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19971 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19972 CONST0_RTX (V4SImode),
19974 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19975 /* Cast the V4SImode vector back to a V8HImode vector. */
19976 tmp1 = gen_reg_rtx (V8HImode);
19977 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19978 /* Duplicate the low short through the whole low SImode word. */
19979 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19980 /* Cast the V8HImode vector back to a V4SImode vector. */
19981 tmp2 = gen_reg_rtx (V4SImode);
19982 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19983 /* Replicate the low element of the V4SImode vector. */
19984 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19985 /* Cast the V2SImode back to V8HImode, and store in target. */
19986 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19997 /* Extend QImode to SImode using a paradoxical SUBREG. */
19998 tmp1 = gen_reg_rtx (SImode);
19999 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20000 /* Insert the SImode value as low element of V4SImode vector. */
20001 tmp2 = gen_reg_rtx (V4SImode);
20002 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20003 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20004 CONST0_RTX (V4SImode),
20006 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20007 /* Cast the V4SImode vector back to a V16QImode vector. */
20008 tmp1 = gen_reg_rtx (V16QImode);
20009 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20010 /* Duplicate the low byte through the whole low SImode word. */
20011 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20012 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20013 /* Cast the V16QImode vector back to a V4SImode vector. */
20014 tmp2 = gen_reg_rtx (V4SImode);
20015 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20016 /* Replicate the low element of the V4SImode vector. */
20017 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20018 /* Cast the V2SImode back to V16QImode, and store in target. */
20019 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20027 /* Replicate the value once into the next wider mode and recurse. */
20028 val = convert_modes (wsmode, smode, val, true);
20029 x = expand_simple_binop (wsmode, ASHIFT, val,
20030 GEN_INT (GET_MODE_BITSIZE (smode)),
20031 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20032 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20034 x = gen_reg_rtx (wvmode);
20035 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20036 gcc_unreachable ();
20037 emit_move_insn (target, gen_lowpart (mode, x));
20045 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20046 whose ONE_VAR element is VAR, and other elements are zero. Return true
20050 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20051 rtx target, rtx var, int one_var)
20053 enum machine_mode vsimode;
20069 var = force_reg (GET_MODE_INNER (mode), var);
20070 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20071 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20076 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20077 new_target = gen_reg_rtx (mode);
20079 new_target = target;
20080 var = force_reg (GET_MODE_INNER (mode), var);
20081 x = gen_rtx_VEC_DUPLICATE (mode, var);
20082 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20083 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20086 /* We need to shuffle the value to the correct position, so
20087 create a new pseudo to store the intermediate result. */
20089 /* With SSE2, we can use the integer shuffle insns. */
20090 if (mode != V4SFmode && TARGET_SSE2)
20092 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20094 GEN_INT (one_var == 1 ? 0 : 1),
20095 GEN_INT (one_var == 2 ? 0 : 1),
20096 GEN_INT (one_var == 3 ? 0 : 1)));
20097 if (target != new_target)
20098 emit_move_insn (target, new_target);
20102 /* Otherwise convert the intermediate result to V4SFmode and
20103 use the SSE1 shuffle instructions. */
20104 if (mode != V4SFmode)
20106 tmp = gen_reg_rtx (V4SFmode);
20107 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20112 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20114 GEN_INT (one_var == 1 ? 0 : 1),
20115 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20116 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20118 if (mode != V4SFmode)
20119 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20120 else if (tmp != target)
20121 emit_move_insn (target, tmp);
20123 else if (target != new_target)
20124 emit_move_insn (target, new_target);
20129 vsimode = V4SImode;
20135 vsimode = V2SImode;
20141 /* Zero extend the variable element to SImode and recurse. */
20142 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20144 x = gen_reg_rtx (vsimode);
20145 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20147 gcc_unreachable ();
20149 emit_move_insn (target, gen_lowpart (mode, x));
20157 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20158 consisting of the values in VALS. It is known that all elements
20159 except ONE_VAR are constants. Return true if successful. */
20162 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20163 rtx target, rtx vals, int one_var)
20165 rtx var = XVECEXP (vals, 0, one_var);
20166 enum machine_mode wmode;
20169 const_vec = copy_rtx (vals);
20170 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20171 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20179 /* For the two element vectors, it's just as easy to use
20180 the general case. */
20196 /* There's no way to set one QImode entry easily. Combine
20197 the variable value with its adjacent constant value, and
20198 promote to an HImode set. */
20199 x = XVECEXP (vals, 0, one_var ^ 1);
20202 var = convert_modes (HImode, QImode, var, true);
20203 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20204 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20205 x = GEN_INT (INTVAL (x) & 0xff);
20209 var = convert_modes (HImode, QImode, var, true);
20210 x = gen_int_mode (INTVAL (x) << 8, HImode);
20212 if (x != const0_rtx)
20213 var = expand_simple_binop (HImode, IOR, var, x, var,
20214 1, OPTAB_LIB_WIDEN);
20216 x = gen_reg_rtx (wmode);
20217 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20218 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20220 emit_move_insn (target, gen_lowpart (mode, x));
20227 emit_move_insn (target, const_vec);
20228 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20232 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20233 all values variable, and none identical. */
20236 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20237 rtx target, rtx vals)
20239 enum machine_mode half_mode = GET_MODE_INNER (mode);
20240 rtx op0 = NULL, op1 = NULL;
20241 bool use_vec_concat = false;
20247 if (!mmx_ok && !TARGET_SSE)
20253 /* For the two element vectors, we always implement VEC_CONCAT. */
20254 op0 = XVECEXP (vals, 0, 0);
20255 op1 = XVECEXP (vals, 0, 1);
20256 use_vec_concat = true;
20260 half_mode = V2SFmode;
20263 half_mode = V2SImode;
20269 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20270 Recurse to load the two halves. */
20272 op0 = gen_reg_rtx (half_mode);
20273 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20274 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20276 op1 = gen_reg_rtx (half_mode);
20277 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20278 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20280 use_vec_concat = true;
20291 gcc_unreachable ();
20294 if (use_vec_concat)
20296 if (!register_operand (op0, half_mode))
20297 op0 = force_reg (half_mode, op0);
20298 if (!register_operand (op1, half_mode))
20299 op1 = force_reg (half_mode, op1);
20301 emit_insn (gen_rtx_SET (VOIDmode, target,
20302 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20306 int i, j, n_elts, n_words, n_elt_per_word;
20307 enum machine_mode inner_mode;
20308 rtx words[4], shift;
20310 inner_mode = GET_MODE_INNER (mode);
20311 n_elts = GET_MODE_NUNITS (mode);
20312 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20313 n_elt_per_word = n_elts / n_words;
20314 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20316 for (i = 0; i < n_words; ++i)
20318 rtx word = NULL_RTX;
20320 for (j = 0; j < n_elt_per_word; ++j)
20322 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20323 elt = convert_modes (word_mode, inner_mode, elt, true);
20329 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20330 word, 1, OPTAB_LIB_WIDEN);
20331 word = expand_simple_binop (word_mode, IOR, word, elt,
20332 word, 1, OPTAB_LIB_WIDEN);
20340 emit_move_insn (target, gen_lowpart (mode, words[0]));
20341 else if (n_words == 2)
20343 rtx tmp = gen_reg_rtx (mode);
20344 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20345 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20346 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20347 emit_move_insn (target, tmp);
20349 else if (n_words == 4)
20351 rtx tmp = gen_reg_rtx (V4SImode);
20352 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20353 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20354 emit_move_insn (target, gen_lowpart (mode, tmp));
20357 gcc_unreachable ();
20361 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20362 instructions unless MMX_OK is true. */
20365 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20367 enum machine_mode mode = GET_MODE (target);
20368 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20369 int n_elts = GET_MODE_NUNITS (mode);
20370 int n_var = 0, one_var = -1;
20371 bool all_same = true, all_const_zero = true;
20375 for (i = 0; i < n_elts; ++i)
20377 x = XVECEXP (vals, 0, i);
20378 if (!CONSTANT_P (x))
20379 n_var++, one_var = i;
20380 else if (x != CONST0_RTX (inner_mode))
20381 all_const_zero = false;
20382 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20386 /* Constants are best loaded from the constant pool. */
20389 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20393 /* If all values are identical, broadcast the value. */
20395 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20396 XVECEXP (vals, 0, 0)))
20399 /* Values where only one field is non-constant are best loaded from
20400 the pool and overwritten via move later. */
20404 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20405 XVECEXP (vals, 0, one_var),
20409 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20413 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20417 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20419 enum machine_mode mode = GET_MODE (target);
20420 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20421 bool use_vec_merge = false;
20430 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20431 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20433 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20435 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20436 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20446 /* For the two element vectors, we implement a VEC_CONCAT with
20447 the extraction of the other element. */
20449 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20450 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20453 op0 = val, op1 = tmp;
20455 op0 = tmp, op1 = val;
20457 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20458 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20466 use_vec_merge = true;
20470 /* tmp = target = A B C D */
20471 tmp = copy_to_reg (target);
20472 /* target = A A B B */
20473 emit_insn (gen_sse_unpcklps (target, target, target));
20474 /* target = X A B B */
20475 ix86_expand_vector_set (false, target, val, 0);
20476 /* target = A X C D */
20477 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20478 GEN_INT (1), GEN_INT (0),
20479 GEN_INT (2+4), GEN_INT (3+4)));
20483 /* tmp = target = A B C D */
20484 tmp = copy_to_reg (target);
20485 /* tmp = X B C D */
20486 ix86_expand_vector_set (false, tmp, val, 0);
20487 /* target = A B X D */
20488 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20489 GEN_INT (0), GEN_INT (1),
20490 GEN_INT (0+4), GEN_INT (3+4)));
20494 /* tmp = target = A B C D */
20495 tmp = copy_to_reg (target);
20496 /* tmp = X B C D */
20497 ix86_expand_vector_set (false, tmp, val, 0);
20498 /* target = A B X D */
20499 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20500 GEN_INT (0), GEN_INT (1),
20501 GEN_INT (2+4), GEN_INT (0+4)));
20505 gcc_unreachable ();
20510 /* Element 0 handled by vec_merge below. */
20513 use_vec_merge = true;
20519 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20520 store into element 0, then shuffle them back. */
20524 order[0] = GEN_INT (elt);
20525 order[1] = const1_rtx;
20526 order[2] = const2_rtx;
20527 order[3] = GEN_INT (3);
20528 order[elt] = const0_rtx;
20530 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20531 order[1], order[2], order[3]));
20533 ix86_expand_vector_set (false, target, val, 0);
20535 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20536 order[1], order[2], order[3]));
20540 /* For SSE1, we have to reuse the V4SF code. */
20541 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20542 gen_lowpart (SFmode, val), elt);
20547 use_vec_merge = TARGET_SSE2;
20550 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20561 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20562 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20563 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20567 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20569 emit_move_insn (mem, target);
20571 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20572 emit_move_insn (tmp, val);
20574 emit_move_insn (target, mem);
20579 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20581 enum machine_mode mode = GET_MODE (vec);
20582 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20583 bool use_vec_extr = false;
20596 use_vec_extr = true;
20608 tmp = gen_reg_rtx (mode);
20609 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20610 GEN_INT (elt), GEN_INT (elt),
20611 GEN_INT (elt+4), GEN_INT (elt+4)));
20615 tmp = gen_reg_rtx (mode);
20616 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20620 gcc_unreachable ();
20623 use_vec_extr = true;
20638 tmp = gen_reg_rtx (mode);
20639 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20640 GEN_INT (elt), GEN_INT (elt),
20641 GEN_INT (elt), GEN_INT (elt)));
20645 tmp = gen_reg_rtx (mode);
20646 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20650 gcc_unreachable ();
20653 use_vec_extr = true;
20658 /* For SSE1, we have to reuse the V4SF code. */
20659 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20660 gen_lowpart (V4SFmode, vec), elt);
20666 use_vec_extr = TARGET_SSE2;
20669 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20674 /* ??? Could extract the appropriate HImode element and shift. */
20681 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20682 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20684 /* Let the rtl optimizers know about the zero extension performed. */
20685 if (inner_mode == HImode)
20687 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20688 target = gen_lowpart (SImode, target);
20691 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20695 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20697 emit_move_insn (mem, vec);
20699 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20700 emit_move_insn (target, tmp);
20704 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20705 pattern to reduce; DEST is the destination; IN is the input vector. */
20708 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20710 rtx tmp1, tmp2, tmp3;
20712 tmp1 = gen_reg_rtx (V4SFmode);
20713 tmp2 = gen_reg_rtx (V4SFmode);
20714 tmp3 = gen_reg_rtx (V4SFmode);
20716 emit_insn (gen_sse_movhlps (tmp1, in, in));
20717 emit_insn (fn (tmp2, tmp1, in));
20719 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20720 GEN_INT (1), GEN_INT (1),
20721 GEN_INT (1+4), GEN_INT (1+4)));
20722 emit_insn (fn (dest, tmp2, tmp3));
20725 /* Target hook for scalar_mode_supported_p. */
20727 ix86_scalar_mode_supported_p (enum machine_mode mode)
20729 if (DECIMAL_FLOAT_MODE_P (mode))
20732 return default_scalar_mode_supported_p (mode);
20735 /* Implements target hook vector_mode_supported_p. */
20737 ix86_vector_mode_supported_p (enum machine_mode mode)
20739 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20741 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20743 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20745 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20750 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20752 We do this in the new i386 backend to maintain source compatibility
20753 with the old cc0-based compiler. */
20756 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20757 tree inputs ATTRIBUTE_UNUSED,
20760 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20762 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20767 /* Implementes target vector targetm.asm.encode_section_info. This
20768 is not used by netware. */
20770 static void ATTRIBUTE_UNUSED
20771 ix86_encode_section_info (tree decl, rtx rtl, int first)
20773 default_encode_section_info (decl, rtl, first);
20775 if (TREE_CODE (decl) == VAR_DECL
20776 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20777 && ix86_in_large_data_p (decl))
20778 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20781 /* Worker function for REVERSE_CONDITION. */
20784 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20786 return (mode != CCFPmode && mode != CCFPUmode
20787 ? reverse_condition (code)
20788 : reverse_condition_maybe_unordered (code));
20791 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20795 output_387_reg_move (rtx insn, rtx *operands)
20797 if (REG_P (operands[1])
20798 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20800 if (REGNO (operands[0]) == FIRST_STACK_REG)
20801 return output_387_ffreep (operands, 0);
20802 return "fstp\t%y0";
20804 if (STACK_TOP_P (operands[0]))
20805 return "fld%z1\t%y1";
20809 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20810 FP status register is set. */
20813 ix86_emit_fp_unordered_jump (rtx label)
20815 rtx reg = gen_reg_rtx (HImode);
20818 emit_insn (gen_x86_fnstsw_1 (reg));
20820 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
20822 emit_insn (gen_x86_sahf_1 (reg));
20824 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20825 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20829 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20831 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20832 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20835 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20836 gen_rtx_LABEL_REF (VOIDmode, label),
20838 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20840 emit_jump_insn (temp);
20841 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20844 /* Output code to perform a log1p XFmode calculation. */
20846 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20848 rtx label1 = gen_label_rtx ();
20849 rtx label2 = gen_label_rtx ();
20851 rtx tmp = gen_reg_rtx (XFmode);
20852 rtx tmp2 = gen_reg_rtx (XFmode);
20854 emit_insn (gen_absxf2 (tmp, op1));
20855 emit_insn (gen_cmpxf (tmp,
20856 CONST_DOUBLE_FROM_REAL_VALUE (
20857 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20859 emit_jump_insn (gen_bge (label1));
20861 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20862 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20863 emit_jump (label2);
20865 emit_label (label1);
20866 emit_move_insn (tmp, CONST1_RTX (XFmode));
20867 emit_insn (gen_addxf3 (tmp, op1, tmp));
20868 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20869 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20871 emit_label (label2);
20874 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20876 static void ATTRIBUTE_UNUSED
20877 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20880 /* With Binutils 2.15, the "@unwind" marker must be specified on
20881 every occurrence of the ".eh_frame" section, not just the first
20884 && strcmp (name, ".eh_frame") == 0)
20886 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20887 flags & SECTION_WRITE ? "aw" : "a");
20890 default_elf_asm_named_section (name, flags, decl);
20893 /* Return the mangling of TYPE if it is an extended fundamental type. */
20895 static const char *
20896 ix86_mangle_fundamental_type (tree type)
20898 switch (TYPE_MODE (type))
20901 /* __float128 is "g". */
20904 /* "long double" or __float80 is "e". */
20911 /* For 32-bit code we can save PIC register setup by using
20912 __stack_chk_fail_local hidden function instead of calling
20913 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20914 register, so it is better to call __stack_chk_fail directly. */
20917 ix86_stack_protect_fail (void)
20919 return TARGET_64BIT
20920 ? default_external_stack_protect_fail ()
20921 : default_hidden_stack_protect_fail ();
20924 /* Select a format to encode pointers in exception handling data. CODE
20925 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20926 true if the symbol may be affected by dynamic relocations.
20928 ??? All x86 object file formats are capable of representing this.
20929 After all, the relocation needed is the same as for the call insn.
20930 Whether or not a particular assembler allows us to enter such, I
20931 guess we'll have to see. */
20933 asm_preferred_eh_data_format (int code, int global)
20937 int type = DW_EH_PE_sdata8;
20939 || ix86_cmodel == CM_SMALL_PIC
20940 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20941 type = DW_EH_PE_sdata4;
20942 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20944 if (ix86_cmodel == CM_SMALL
20945 || (ix86_cmodel == CM_MEDIUM && code))
20946 return DW_EH_PE_udata4;
20947 return DW_EH_PE_absptr;
20950 /* Expand copysign from SIGN to the positive value ABS_VALUE
20951 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20954 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20956 enum machine_mode mode = GET_MODE (sign);
20957 rtx sgn = gen_reg_rtx (mode);
20958 if (mask == NULL_RTX)
20960 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20961 if (!VECTOR_MODE_P (mode))
20963 /* We need to generate a scalar mode mask in this case. */
20964 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20965 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20966 mask = gen_reg_rtx (mode);
20967 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20971 mask = gen_rtx_NOT (mode, mask);
20972 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20973 gen_rtx_AND (mode, mask, sign)));
20974 emit_insn (gen_rtx_SET (VOIDmode, result,
20975 gen_rtx_IOR (mode, abs_value, sgn)));
20978 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20979 mask for masking out the sign-bit is stored in *SMASK, if that is
20982 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20984 enum machine_mode mode = GET_MODE (op0);
20987 xa = gen_reg_rtx (mode);
20988 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20989 if (!VECTOR_MODE_P (mode))
20991 /* We need to generate a scalar mode mask in this case. */
20992 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20993 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20994 mask = gen_reg_rtx (mode);
20995 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20997 emit_insn (gen_rtx_SET (VOIDmode, xa,
20998 gen_rtx_AND (mode, op0, mask)));
21006 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21007 swapping the operands if SWAP_OPERANDS is true. The expanded
21008 code is a forward jump to a newly created label in case the
21009 comparison is true. The generated label rtx is returned. */
21011 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21012 bool swap_operands)
21023 label = gen_label_rtx ();
21024 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21025 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21026 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21027 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21028 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21029 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21030 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21031 JUMP_LABEL (tmp) = label;
21036 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21037 using comparison code CODE. Operands are swapped for the comparison if
21038 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21040 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21041 bool swap_operands)
21043 enum machine_mode mode = GET_MODE (op0);
21044 rtx mask = gen_reg_rtx (mode);
21053 if (mode == DFmode)
21054 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21055 gen_rtx_fmt_ee (code, mode, op0, op1)));
21057 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21058 gen_rtx_fmt_ee (code, mode, op0, op1)));
21063 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21064 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21066 ix86_gen_TWO52 (enum machine_mode mode)
21068 REAL_VALUE_TYPE TWO52r;
21071 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21072 TWO52 = const_double_from_real_value (TWO52r, mode);
21073 TWO52 = force_reg (mode, TWO52);
21078 /* Expand SSE sequence for computing lround from OP1 storing
21081 ix86_expand_lround (rtx op0, rtx op1)
21083 /* C code for the stuff we're doing below:
21084 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21087 enum machine_mode mode = GET_MODE (op1);
21088 const struct real_format *fmt;
21089 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21092 /* load nextafter (0.5, 0.0) */
21093 fmt = REAL_MODE_FORMAT (mode);
21094 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21095 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21097 /* adj = copysign (0.5, op1) */
21098 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21099 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21101 /* adj = op1 + adj */
21102 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21104 /* op0 = (imode)adj */
21105 expand_fix (op0, adj, 0);
21108 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21111 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21113 /* C code for the stuff we're doing below (for do_floor):
21115 xi -= (double)xi > op1 ? 1 : 0;
21118 enum machine_mode fmode = GET_MODE (op1);
21119 enum machine_mode imode = GET_MODE (op0);
21120 rtx ireg, freg, label, tmp;
21122 /* reg = (long)op1 */
21123 ireg = gen_reg_rtx (imode);
21124 expand_fix (ireg, op1, 0);
21126 /* freg = (double)reg */
21127 freg = gen_reg_rtx (fmode);
21128 expand_float (freg, ireg, 0);
21130 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21131 label = ix86_expand_sse_compare_and_jump (UNLE,
21132 freg, op1, !do_floor);
21133 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21134 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21135 emit_move_insn (ireg, tmp);
21137 emit_label (label);
21138 LABEL_NUSES (label) = 1;
21140 emit_move_insn (op0, ireg);
21143 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21144 result in OPERAND0. */
21146 ix86_expand_rint (rtx operand0, rtx operand1)
21148 /* C code for the stuff we're doing below:
21149 xa = fabs (operand1);
21150 if (!isless (xa, 2**52))
21152 xa = xa + 2**52 - 2**52;
21153 return copysign (xa, operand1);
21155 enum machine_mode mode = GET_MODE (operand0);
21156 rtx res, xa, label, TWO52, mask;
21158 res = gen_reg_rtx (mode);
21159 emit_move_insn (res, operand1);
21161 /* xa = abs (operand1) */
21162 xa = ix86_expand_sse_fabs (res, &mask);
21164 /* if (!isless (xa, TWO52)) goto label; */
21165 TWO52 = ix86_gen_TWO52 (mode);
21166 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21168 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21169 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21171 ix86_sse_copysign_to_positive (res, xa, res, mask);
21173 emit_label (label);
21174 LABEL_NUSES (label) = 1;
21176 emit_move_insn (operand0, res);
21179 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21182 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21184 /* C code for the stuff we expand below.
21185 double xa = fabs (x), x2;
21186 if (!isless (xa, TWO52))
21188 xa = xa + TWO52 - TWO52;
21189 x2 = copysign (xa, x);
21198 enum machine_mode mode = GET_MODE (operand0);
21199 rtx xa, TWO52, tmp, label, one, res, mask;
21201 TWO52 = ix86_gen_TWO52 (mode);
21203 /* Temporary for holding the result, initialized to the input
21204 operand to ease control flow. */
21205 res = gen_reg_rtx (mode);
21206 emit_move_insn (res, operand1);
21208 /* xa = abs (operand1) */
21209 xa = ix86_expand_sse_fabs (res, &mask);
21211 /* if (!isless (xa, TWO52)) goto label; */
21212 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21214 /* xa = xa + TWO52 - TWO52; */
21215 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21216 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21218 /* xa = copysign (xa, operand1) */
21219 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21221 /* generate 1.0 or -1.0 */
21222 one = force_reg (mode,
21223 const_double_from_real_value (do_floor
21224 ? dconst1 : dconstm1, mode));
21226 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21227 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21228 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21229 gen_rtx_AND (mode, one, tmp)));
21230 /* We always need to subtract here to preserve signed zero. */
21231 tmp = expand_simple_binop (mode, MINUS,
21232 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21233 emit_move_insn (res, tmp);
21235 emit_label (label);
21236 LABEL_NUSES (label) = 1;
21238 emit_move_insn (operand0, res);
21241 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21244 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21246 /* C code for the stuff we expand below.
21247 double xa = fabs (x), x2;
21248 if (!isless (xa, TWO52))
21250 x2 = (double)(long)x;
21257 if (HONOR_SIGNED_ZEROS (mode))
21258 return copysign (x2, x);
21261 enum machine_mode mode = GET_MODE (operand0);
21262 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21264 TWO52 = ix86_gen_TWO52 (mode);
21266 /* Temporary for holding the result, initialized to the input
21267 operand to ease control flow. */
21268 res = gen_reg_rtx (mode);
21269 emit_move_insn (res, operand1);
21271 /* xa = abs (operand1) */
21272 xa = ix86_expand_sse_fabs (res, &mask);
21274 /* if (!isless (xa, TWO52)) goto label; */
21275 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21277 /* xa = (double)(long)x */
21278 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21279 expand_fix (xi, res, 0);
21280 expand_float (xa, xi, 0);
21283 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21285 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21286 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21287 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21288 gen_rtx_AND (mode, one, tmp)));
21289 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21290 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21291 emit_move_insn (res, tmp);
21293 if (HONOR_SIGNED_ZEROS (mode))
21294 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21296 emit_label (label);
21297 LABEL_NUSES (label) = 1;
21299 emit_move_insn (operand0, res);
21302 /* Expand SSE sequence for computing round from OPERAND1 storing
21303 into OPERAND0. Sequence that works without relying on DImode truncation
21304 via cvttsd2siq that is only available on 64bit targets. */
21306 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21308 /* C code for the stuff we expand below.
21309 double xa = fabs (x), xa2, x2;
21310 if (!isless (xa, TWO52))
21312 Using the absolute value and copying back sign makes
21313 -0.0 -> -0.0 correct.
21314 xa2 = xa + TWO52 - TWO52;
21319 else if (dxa > 0.5)
21321 x2 = copysign (xa2, x);
21324 enum machine_mode mode = GET_MODE (operand0);
21325 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21327 TWO52 = ix86_gen_TWO52 (mode);
21329 /* Temporary for holding the result, initialized to the input
21330 operand to ease control flow. */
21331 res = gen_reg_rtx (mode);
21332 emit_move_insn (res, operand1);
21334 /* xa = abs (operand1) */
21335 xa = ix86_expand_sse_fabs (res, &mask);
21337 /* if (!isless (xa, TWO52)) goto label; */
21338 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21340 /* xa2 = xa + TWO52 - TWO52; */
21341 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21342 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21344 /* dxa = xa2 - xa; */
21345 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21347 /* generate 0.5, 1.0 and -0.5 */
21348 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21349 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21350 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21354 tmp = gen_reg_rtx (mode);
21355 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21356 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21357 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21358 gen_rtx_AND (mode, one, tmp)));
21359 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21360 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21361 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21362 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21363 gen_rtx_AND (mode, one, tmp)));
21364 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21366 /* res = copysign (xa2, operand1) */
21367 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21369 emit_label (label);
21370 LABEL_NUSES (label) = 1;
21372 emit_move_insn (operand0, res);
21375 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21378 ix86_expand_trunc (rtx operand0, rtx operand1)
21380 /* C code for SSE variant we expand below.
21381 double xa = fabs (x), x2;
21382 if (!isless (xa, TWO52))
21384 x2 = (double)(long)x;
21385 if (HONOR_SIGNED_ZEROS (mode))
21386 return copysign (x2, x);
21389 enum machine_mode mode = GET_MODE (operand0);
21390 rtx xa, xi, TWO52, label, res, mask;
21392 TWO52 = ix86_gen_TWO52 (mode);
21394 /* Temporary for holding the result, initialized to the input
21395 operand to ease control flow. */
21396 res = gen_reg_rtx (mode);
21397 emit_move_insn (res, operand1);
21399 /* xa = abs (operand1) */
21400 xa = ix86_expand_sse_fabs (res, &mask);
21402 /* if (!isless (xa, TWO52)) goto label; */
21403 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21405 /* x = (double)(long)x */
21406 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21407 expand_fix (xi, res, 0);
21408 expand_float (res, xi, 0);
21410 if (HONOR_SIGNED_ZEROS (mode))
21411 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21413 emit_label (label);
21414 LABEL_NUSES (label) = 1;
21416 emit_move_insn (operand0, res);
21419 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21422 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21424 enum machine_mode mode = GET_MODE (operand0);
21425 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21427 /* C code for SSE variant we expand below.
21428 double xa = fabs (x), x2;
21429 if (!isless (xa, TWO52))
21431 xa2 = xa + TWO52 - TWO52;
21435 x2 = copysign (xa2, x);
21439 TWO52 = ix86_gen_TWO52 (mode);
21441 /* Temporary for holding the result, initialized to the input
21442 operand to ease control flow. */
21443 res = gen_reg_rtx (mode);
21444 emit_move_insn (res, operand1);
21446 /* xa = abs (operand1) */
21447 xa = ix86_expand_sse_fabs (res, &smask);
21449 /* if (!isless (xa, TWO52)) goto label; */
21450 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21452 /* res = xa + TWO52 - TWO52; */
21453 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21454 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21455 emit_move_insn (res, tmp);
21458 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21460 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21461 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21462 emit_insn (gen_rtx_SET (VOIDmode, mask,
21463 gen_rtx_AND (mode, mask, one)));
21464 tmp = expand_simple_binop (mode, MINUS,
21465 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21466 emit_move_insn (res, tmp);
21468 /* res = copysign (res, operand1) */
21469 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21471 emit_label (label);
21472 LABEL_NUSES (label) = 1;
21474 emit_move_insn (operand0, res);
21477 /* Expand SSE sequence for computing round from OPERAND1 storing
21480 ix86_expand_round (rtx operand0, rtx operand1)
21482 /* C code for the stuff we're doing below:
21483 double xa = fabs (x);
21484 if (!isless (xa, TWO52))
21486 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21487 return copysign (xa, x);
21489 enum machine_mode mode = GET_MODE (operand0);
21490 rtx res, TWO52, xa, label, xi, half, mask;
21491 const struct real_format *fmt;
21492 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21494 /* Temporary for holding the result, initialized to the input
21495 operand to ease control flow. */
21496 res = gen_reg_rtx (mode);
21497 emit_move_insn (res, operand1);
21499 TWO52 = ix86_gen_TWO52 (mode);
21500 xa = ix86_expand_sse_fabs (res, &mask);
21501 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21503 /* load nextafter (0.5, 0.0) */
21504 fmt = REAL_MODE_FORMAT (mode);
21505 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21506 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21508 /* xa = xa + 0.5 */
21509 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21510 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21512 /* xa = (double)(int64_t)xa */
21513 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21514 expand_fix (xi, xa, 0);
21515 expand_float (xa, xi, 0);
21517 /* res = copysign (xa, operand1) */
21518 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21520 emit_label (label);
21521 LABEL_NUSES (label) = 1;
21523 emit_move_insn (operand0, res);
21527 /* Table of valid machine attributes. */
21528 static const struct attribute_spec ix86_attribute_table[] =
21530 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21531 /* Stdcall attribute says callee is responsible for popping arguments
21532 if they are not variable. */
21533 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21534 /* Fastcall attribute says callee is responsible for popping arguments
21535 if they are not variable. */
21536 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21537 /* Cdecl attribute says the callee is a normal C declaration */
21538 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21539 /* Regparm attribute specifies how many integer arguments are to be
21540 passed in registers. */
21541 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21542 /* Sseregparm attribute says we are using x86_64 calling conventions
21543 for FP arguments. */
21544 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21545 /* force_align_arg_pointer says this function realigns the stack at entry. */
21546 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21547 false, true, true, ix86_handle_cconv_attribute },
21548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21549 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21550 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21551 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21553 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21554 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21555 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21556 SUBTARGET_ATTRIBUTE_TABLE,
21558 { NULL, 0, 0, false, false, false, NULL }
21561 /* Initialize the GCC target structure. */
21562 #undef TARGET_ATTRIBUTE_TABLE
21563 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21564 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21565 # undef TARGET_MERGE_DECL_ATTRIBUTES
21566 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21569 #undef TARGET_COMP_TYPE_ATTRIBUTES
21570 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21572 #undef TARGET_INIT_BUILTINS
21573 #define TARGET_INIT_BUILTINS ix86_init_builtins
21574 #undef TARGET_EXPAND_BUILTIN
21575 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21577 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21578 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21579 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21580 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21582 #undef TARGET_ASM_FUNCTION_EPILOGUE
21583 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21585 #undef TARGET_ENCODE_SECTION_INFO
21586 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21587 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21589 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21592 #undef TARGET_ASM_OPEN_PAREN
21593 #define TARGET_ASM_OPEN_PAREN ""
21594 #undef TARGET_ASM_CLOSE_PAREN
21595 #define TARGET_ASM_CLOSE_PAREN ""
21597 #undef TARGET_ASM_ALIGNED_HI_OP
21598 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
21599 #undef TARGET_ASM_ALIGNED_SI_OP
21600 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
21602 #undef TARGET_ASM_ALIGNED_DI_OP
21603 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
21606 #undef TARGET_ASM_UNALIGNED_HI_OP
21607 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
21608 #undef TARGET_ASM_UNALIGNED_SI_OP
21609 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
21610 #undef TARGET_ASM_UNALIGNED_DI_OP
21611 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
21613 #undef TARGET_SCHED_ADJUST_COST
21614 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
21615 #undef TARGET_SCHED_ISSUE_RATE
21616 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
21617 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
21618 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
21619 ia32_multipass_dfa_lookahead
21621 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
21622 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
21625 #undef TARGET_HAVE_TLS
21626 #define TARGET_HAVE_TLS true
21628 #undef TARGET_CANNOT_FORCE_CONST_MEM
21629 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
21630 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
21631 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
21633 #undef TARGET_DELEGITIMIZE_ADDRESS
21634 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
21636 #undef TARGET_MS_BITFIELD_LAYOUT_P
21637 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
21640 #undef TARGET_BINDS_LOCAL_P
21641 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
21644 #undef TARGET_ASM_OUTPUT_MI_THUNK
21645 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
21646 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
21647 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
21649 #undef TARGET_ASM_FILE_START
21650 #define TARGET_ASM_FILE_START x86_file_start
21652 #undef TARGET_DEFAULT_TARGET_FLAGS
21653 #define TARGET_DEFAULT_TARGET_FLAGS \
21655 | TARGET_64BIT_DEFAULT \
21656 | TARGET_SUBTARGET_DEFAULT \
21657 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
21659 #undef TARGET_HANDLE_OPTION
21660 #define TARGET_HANDLE_OPTION ix86_handle_option
21662 #undef TARGET_RTX_COSTS
21663 #define TARGET_RTX_COSTS ix86_rtx_costs
21664 #undef TARGET_ADDRESS_COST
21665 #define TARGET_ADDRESS_COST ix86_address_cost
21667 #undef TARGET_FIXED_CONDITION_CODE_REGS
21668 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
21669 #undef TARGET_CC_MODES_COMPATIBLE
21670 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
21672 #undef TARGET_MACHINE_DEPENDENT_REORG
21673 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
21675 #undef TARGET_BUILD_BUILTIN_VA_LIST
21676 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
21678 #undef TARGET_MD_ASM_CLOBBERS
21679 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
21681 #undef TARGET_PROMOTE_PROTOTYPES
21682 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
21683 #undef TARGET_STRUCT_VALUE_RTX
21684 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
21685 #undef TARGET_SETUP_INCOMING_VARARGS
21686 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
21687 #undef TARGET_MUST_PASS_IN_STACK
21688 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
21689 #undef TARGET_PASS_BY_REFERENCE
21690 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
21691 #undef TARGET_INTERNAL_ARG_POINTER
21692 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
21693 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
21694 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
21696 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
21697 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
21699 #undef TARGET_SCALAR_MODE_SUPPORTED_P
21700 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
21702 #undef TARGET_VECTOR_MODE_SUPPORTED_P
21703 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
21706 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
21707 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
21710 #ifdef SUBTARGET_INSERT_ATTRIBUTES
21711 #undef TARGET_INSERT_ATTRIBUTES
21712 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
21715 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
21716 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
21718 #undef TARGET_STACK_PROTECT_FAIL
21719 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
21721 #undef TARGET_FUNCTION_VALUE
21722 #define TARGET_FUNCTION_VALUE ix86_function_value
21724 struct gcc_target targetm = TARGET_INITIALIZER;
21726 #include "gt-i386.h"