1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068 /* X86_TUNE_USE_MOV0 */
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1083 /* X86_TUNE_READ_MODIFY */
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168 /* X86_TUNE_SHIFT1 */
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1253 static enum stringop_alg stringop_alg = no_stringop;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1316 static int const x86_64_int_return_registers[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry GTY(())
1412 unsigned short mode;
1415 struct stack_local_entry *next;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1442 HOST_WIDE_INT frame;
1444 int outgoing_arguments_size;
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold = 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1520 static const char * const x86_64_reg_class_name[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1547 /* Implement TARGET_HANDLE_OPTION. */
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1574 | MASK_SSE4_1 | MASK_SSE4A);
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 | MASK_SSE4_1 | MASK_SSE4A);
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4_1
1585 target_flags_explicit |= (MASK_SSE3 | MASK_SSSE3
1586 | MASK_SSE4_1 | MASK_SSE4A);
1593 target_flags &= ~(MASK_SSSE3 | MASK_SSE4_1 | MASK_SSE4A);
1594 target_flags_explicit |= (MASK_SSSE3 | MASK_SSE4_1
1602 target_flags &= ~(MASK_SSE4_1 | MASK_SSE4A);
1603 target_flags_explicit |= MASK_SSE4_1 | MASK_SSE4A;
1610 target_flags &= ~MASK_SSE4A;
1611 target_flags_explicit |= MASK_SSE4A;
1618 target_flags &= ~MASK_SSE4_1;
1619 target_flags_explicit |= MASK_SSE4_1;
1628 /* Sometimes certain combinations of command options do not make
1629 sense on a particular target machine. You can define a macro
1630 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1631 defined, is executed once just after all the command options have
1634 Don't use this macro to turn on various extra optimizations for
1635 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1638 override_options (void)
1641 int ix86_tune_defaulted = 0;
1642 unsigned int ix86_arch_mask, ix86_tune_mask;
1644 /* Comes from final.c -- no real reason to change it. */
1645 #define MAX_CODE_ALIGN 16
1649 const struct processor_costs *cost; /* Processor costs */
1650 const int target_enable; /* Target flags to enable. */
1651 const int target_disable; /* Target flags to disable. */
1652 const int align_loop; /* Default alignments. */
1653 const int align_loop_max_skip;
1654 const int align_jump;
1655 const int align_jump_max_skip;
1656 const int align_func;
1658 const processor_target_table[PROCESSOR_max] =
1660 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1661 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1662 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1663 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1664 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1665 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1666 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1667 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1668 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1669 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1670 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1671 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1672 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1673 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1676 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1679 const char *const name; /* processor name or nickname. */
1680 const enum processor_type processor;
1681 const enum pta_flags
1687 PTA_PREFETCH_SSE = 1 << 4,
1689 PTA_3DNOW_A = 1 << 6,
1693 PTA_POPCNT = 1 << 10,
1695 PTA_SSE4A = 1 << 12,
1696 PTA_NO_SAHF = 1 << 13,
1697 PTA_SSE4_1 = 1 << 14
1700 const processor_alias_table[] =
1702 {"i386", PROCESSOR_I386, 0},
1703 {"i486", PROCESSOR_I486, 0},
1704 {"i586", PROCESSOR_PENTIUM, 0},
1705 {"pentium", PROCESSOR_PENTIUM, 0},
1706 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1707 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1708 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1709 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1710 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1711 {"i686", PROCESSOR_PENTIUMPRO, 0},
1712 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1713 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1714 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1715 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1716 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1717 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1718 | PTA_MMX | PTA_PREFETCH_SSE},
1719 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1720 | PTA_MMX | PTA_PREFETCH_SSE},
1721 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1722 | PTA_MMX | PTA_PREFETCH_SSE},
1723 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1724 | PTA_MMX | PTA_PREFETCH_SSE
1725 | PTA_CX16 | PTA_NO_SAHF},
1726 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1727 | PTA_64BIT | PTA_MMX
1728 | PTA_PREFETCH_SSE | PTA_CX16},
1729 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1731 {"k6", PROCESSOR_K6, PTA_MMX},
1732 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1733 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1734 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1736 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1737 | PTA_3DNOW | PTA_3DNOW_A},
1738 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1739 | PTA_3DNOW_A | PTA_SSE},
1740 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1741 | PTA_3DNOW_A | PTA_SSE},
1742 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1743 | PTA_3DNOW_A | PTA_SSE},
1744 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1745 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1746 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1747 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1749 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1750 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1751 | PTA_SSE3 | PTA_NO_SAHF},
1752 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1753 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1754 | PTA_SSE2 | PTA_NO_SAHF},
1755 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1756 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1757 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1758 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1759 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1760 | PTA_SSE2 | PTA_NO_SAHF},
1761 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1762 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1763 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1764 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1765 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1766 | PTA_SSE2 | PTA_NO_SAHF},
1767 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1768 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1769 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1770 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1771 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1772 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1773 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1774 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1775 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1776 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1779 int const pta_size = ARRAY_SIZE (processor_alias_table);
1781 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1782 SUBTARGET_OVERRIDE_OPTIONS;
1785 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1786 SUBSUBTARGET_OVERRIDE_OPTIONS;
1789 /* -fPIC is the default for x86_64. */
1790 if (TARGET_MACHO && TARGET_64BIT)
1793 /* Set the default values for switches whose default depends on TARGET_64BIT
1794 in case they weren't overwritten by command line options. */
1797 /* Mach-O doesn't support omitting the frame pointer for now. */
1798 if (flag_omit_frame_pointer == 2)
1799 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1800 if (flag_asynchronous_unwind_tables == 2)
1801 flag_asynchronous_unwind_tables = 1;
1802 if (flag_pcc_struct_return == 2)
1803 flag_pcc_struct_return = 0;
1807 if (flag_omit_frame_pointer == 2)
1808 flag_omit_frame_pointer = 0;
1809 if (flag_asynchronous_unwind_tables == 2)
1810 flag_asynchronous_unwind_tables = 0;
1811 if (flag_pcc_struct_return == 2)
1812 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1815 /* Need to check -mtune=generic first. */
1816 if (ix86_tune_string)
1818 if (!strcmp (ix86_tune_string, "generic")
1819 || !strcmp (ix86_tune_string, "i686")
1820 /* As special support for cross compilers we read -mtune=native
1821 as -mtune=generic. With native compilers we won't see the
1822 -mtune=native, as it was changed by the driver. */
1823 || !strcmp (ix86_tune_string, "native"))
1826 ix86_tune_string = "generic64";
1828 ix86_tune_string = "generic32";
1830 else if (!strncmp (ix86_tune_string, "generic", 7))
1831 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1835 if (ix86_arch_string)
1836 ix86_tune_string = ix86_arch_string;
1837 if (!ix86_tune_string)
1839 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1840 ix86_tune_defaulted = 1;
1843 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1844 need to use a sensible tune option. */
1845 if (!strcmp (ix86_tune_string, "generic")
1846 || !strcmp (ix86_tune_string, "x86-64")
1847 || !strcmp (ix86_tune_string, "i686"))
1850 ix86_tune_string = "generic64";
1852 ix86_tune_string = "generic32";
1855 if (ix86_stringop_string)
1857 if (!strcmp (ix86_stringop_string, "rep_byte"))
1858 stringop_alg = rep_prefix_1_byte;
1859 else if (!strcmp (ix86_stringop_string, "libcall"))
1860 stringop_alg = libcall;
1861 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1862 stringop_alg = rep_prefix_4_byte;
1863 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1864 stringop_alg = rep_prefix_8_byte;
1865 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1866 stringop_alg = loop_1_byte;
1867 else if (!strcmp (ix86_stringop_string, "loop"))
1868 stringop_alg = loop;
1869 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1870 stringop_alg = unrolled_loop;
1872 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1874 if (!strcmp (ix86_tune_string, "x86-64"))
1875 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1876 "-mtune=generic instead as appropriate.");
1878 if (!ix86_arch_string)
1879 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1880 if (!strcmp (ix86_arch_string, "generic"))
1881 error ("generic CPU can be used only for -mtune= switch");
1882 if (!strncmp (ix86_arch_string, "generic", 7))
1883 error ("bad value (%s) for -march= switch", ix86_arch_string);
1885 if (ix86_cmodel_string != 0)
1887 if (!strcmp (ix86_cmodel_string, "small"))
1888 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1889 else if (!strcmp (ix86_cmodel_string, "medium"))
1890 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1891 else if (!strcmp (ix86_cmodel_string, "large"))
1892 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1894 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1895 else if (!strcmp (ix86_cmodel_string, "32"))
1896 ix86_cmodel = CM_32;
1897 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1898 ix86_cmodel = CM_KERNEL;
1900 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1904 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1905 use of rip-relative addressing. This eliminates fixups that
1906 would otherwise be needed if this object is to be placed in a
1907 DLL, and is essentially just as efficient as direct addressing. */
1908 if (TARGET_64BIT_MS_ABI)
1909 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1910 else if (TARGET_64BIT)
1911 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1913 ix86_cmodel = CM_32;
1915 if (ix86_asm_string != 0)
1918 && !strcmp (ix86_asm_string, "intel"))
1919 ix86_asm_dialect = ASM_INTEL;
1920 else if (!strcmp (ix86_asm_string, "att"))
1921 ix86_asm_dialect = ASM_ATT;
1923 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1925 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1926 error ("code model %qs not supported in the %s bit mode",
1927 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1928 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1929 sorry ("%i-bit mode not compiled in",
1930 (target_flags & MASK_64BIT) ? 64 : 32);
1932 for (i = 0; i < pta_size; i++)
1933 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1935 ix86_arch = processor_alias_table[i].processor;
1936 /* Default cpu tuning to the architecture. */
1937 ix86_tune = ix86_arch;
1938 if (processor_alias_table[i].flags & PTA_MMX
1939 && !(target_flags_explicit & MASK_MMX))
1940 target_flags |= MASK_MMX;
1941 if (processor_alias_table[i].flags & PTA_3DNOW
1942 && !(target_flags_explicit & MASK_3DNOW))
1943 target_flags |= MASK_3DNOW;
1944 if (processor_alias_table[i].flags & PTA_3DNOW_A
1945 && !(target_flags_explicit & MASK_3DNOW_A))
1946 target_flags |= MASK_3DNOW_A;
1947 if (processor_alias_table[i].flags & PTA_SSE
1948 && !(target_flags_explicit & MASK_SSE))
1949 target_flags |= MASK_SSE;
1950 if (processor_alias_table[i].flags & PTA_SSE2
1951 && !(target_flags_explicit & MASK_SSE2))
1952 target_flags |= MASK_SSE2;
1953 if (processor_alias_table[i].flags & PTA_SSE3
1954 && !(target_flags_explicit & MASK_SSE3))
1955 target_flags |= MASK_SSE3;
1956 if (processor_alias_table[i].flags & PTA_SSSE3
1957 && !(target_flags_explicit & MASK_SSSE3))
1958 target_flags |= MASK_SSSE3;
1959 if (processor_alias_table[i].flags & PTA_SSE4_1
1960 && !(target_flags_explicit & MASK_SSE4_1))
1961 target_flags |= MASK_SSE4_1;
1962 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1963 x86_prefetch_sse = true;
1964 if (processor_alias_table[i].flags & PTA_CX16)
1965 x86_cmpxchg16b = true;
1966 if (processor_alias_table[i].flags & PTA_POPCNT
1967 && !(target_flags_explicit & MASK_POPCNT))
1968 target_flags |= MASK_POPCNT;
1969 if (processor_alias_table[i].flags & PTA_ABM
1970 && !(target_flags_explicit & MASK_ABM))
1971 target_flags |= MASK_ABM;
1972 if (processor_alias_table[i].flags & PTA_SSE4A
1973 && !(target_flags_explicit & MASK_SSE4A))
1974 target_flags |= MASK_SSE4A;
1975 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1977 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1978 error ("CPU you selected does not support x86-64 "
1984 error ("bad value (%s) for -march= switch", ix86_arch_string);
1986 ix86_arch_mask = 1u << ix86_arch;
1987 for (i = 0; i < X86_ARCH_LAST; ++i)
1988 ix86_arch_features[i] &= ix86_arch_mask;
1990 for (i = 0; i < pta_size; i++)
1991 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1993 ix86_tune = processor_alias_table[i].processor;
1994 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1996 if (ix86_tune_defaulted)
1998 ix86_tune_string = "x86-64";
1999 for (i = 0; i < pta_size; i++)
2000 if (! strcmp (ix86_tune_string,
2001 processor_alias_table[i].name))
2003 ix86_tune = processor_alias_table[i].processor;
2006 error ("CPU you selected does not support x86-64 "
2009 /* Intel CPUs have always interpreted SSE prefetch instructions as
2010 NOPs; so, we can enable SSE prefetch instructions even when
2011 -mtune (rather than -march) points us to a processor that has them.
2012 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2013 higher processors. */
2014 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2015 x86_prefetch_sse = true;
2019 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2021 ix86_tune_mask = 1u << ix86_tune;
2022 for (i = 0; i < X86_TUNE_LAST; ++i)
2023 ix86_tune_features[i] &= ix86_tune_mask;
2026 ix86_cost = &size_cost;
2028 ix86_cost = processor_target_table[ix86_tune].cost;
2029 target_flags |= processor_target_table[ix86_tune].target_enable;
2030 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2032 /* Arrange to set up i386_stack_locals for all functions. */
2033 init_machine_status = ix86_init_machine_status;
2035 /* Validate -mregparm= value. */
2036 if (ix86_regparm_string)
2039 warning (0, "-mregparm is ignored in 64-bit mode");
2040 i = atoi (ix86_regparm_string);
2041 if (i < 0 || i > REGPARM_MAX)
2042 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2047 ix86_regparm = REGPARM_MAX;
2049 /* If the user has provided any of the -malign-* options,
2050 warn and use that value only if -falign-* is not set.
2051 Remove this code in GCC 3.2 or later. */
2052 if (ix86_align_loops_string)
2054 warning (0, "-malign-loops is obsolete, use -falign-loops");
2055 if (align_loops == 0)
2057 i = atoi (ix86_align_loops_string);
2058 if (i < 0 || i > MAX_CODE_ALIGN)
2059 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2061 align_loops = 1 << i;
2065 if (ix86_align_jumps_string)
2067 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2068 if (align_jumps == 0)
2070 i = atoi (ix86_align_jumps_string);
2071 if (i < 0 || i > MAX_CODE_ALIGN)
2072 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2074 align_jumps = 1 << i;
2078 if (ix86_align_funcs_string)
2080 warning (0, "-malign-functions is obsolete, use -falign-functions");
2081 if (align_functions == 0)
2083 i = atoi (ix86_align_funcs_string);
2084 if (i < 0 || i > MAX_CODE_ALIGN)
2085 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2087 align_functions = 1 << i;
2091 /* Default align_* from the processor table. */
2092 if (align_loops == 0)
2094 align_loops = processor_target_table[ix86_tune].align_loop;
2095 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2097 if (align_jumps == 0)
2099 align_jumps = processor_target_table[ix86_tune].align_jump;
2100 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2102 if (align_functions == 0)
2104 align_functions = processor_target_table[ix86_tune].align_func;
2107 /* Validate -mbranch-cost= value, or provide default. */
2108 ix86_branch_cost = ix86_cost->branch_cost;
2109 if (ix86_branch_cost_string)
2111 i = atoi (ix86_branch_cost_string);
2113 error ("-mbranch-cost=%d is not between 0 and 5", i);
2115 ix86_branch_cost = i;
2117 if (ix86_section_threshold_string)
2119 i = atoi (ix86_section_threshold_string);
2121 error ("-mlarge-data-threshold=%d is negative", i);
2123 ix86_section_threshold = i;
2126 if (ix86_tls_dialect_string)
2128 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2129 ix86_tls_dialect = TLS_DIALECT_GNU;
2130 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2131 ix86_tls_dialect = TLS_DIALECT_GNU2;
2132 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2133 ix86_tls_dialect = TLS_DIALECT_SUN;
2135 error ("bad value (%s) for -mtls-dialect= switch",
2136 ix86_tls_dialect_string);
2139 if (ix87_precision_string)
2141 i = atoi (ix87_precision_string);
2142 if (i != 32 && i != 64 && i != 80)
2143 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2146 /* Keep nonleaf frame pointers. */
2147 if (flag_omit_frame_pointer)
2148 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2149 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2150 flag_omit_frame_pointer = 1;
2152 /* If we're doing fast math, we don't care about comparison order
2153 wrt NaNs. This lets us use a shorter comparison sequence. */
2154 if (flag_finite_math_only)
2155 target_flags &= ~MASK_IEEE_FP;
2157 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2158 since the insns won't need emulation. */
2159 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2160 target_flags &= ~MASK_NO_FANCY_MATH_387;
2162 /* Likewise, if the target doesn't have a 387, or we've specified
2163 software floating point, don't use 387 inline intrinsics. */
2165 target_flags |= MASK_NO_FANCY_MATH_387;
2167 /* Turn on SSSE3 builtins for -msse4.1. */
2169 target_flags |= MASK_SSSE3;
2171 /* Turn on SSE3 builtins for -mssse3. */
2173 target_flags |= MASK_SSE3;
2175 /* Turn on SSE3 builtins for -msse4a. */
2177 target_flags |= MASK_SSE3;
2179 /* Turn on SSE2 builtins for -msse3. */
2181 target_flags |= MASK_SSE2;
2183 /* Turn on SSE builtins for -msse2. */
2185 target_flags |= MASK_SSE;
2187 /* Turn on MMX builtins for -msse. */
2190 target_flags |= MASK_MMX & ~target_flags_explicit;
2191 x86_prefetch_sse = true;
2194 /* Turn on MMX builtins for 3Dnow. */
2196 target_flags |= MASK_MMX;
2198 /* Turn on POPCNT builtins for -mabm. */
2200 target_flags |= MASK_POPCNT;
2205 warning (0, "-mrtd is ignored in 64bit mode");
2207 /* Enable by default the SSE and MMX builtins. Do allow the user to
2208 explicitly disable any of these. In particular, disabling SSE and
2209 MMX for kernel code is extremely useful. */
2211 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2212 & ~target_flags_explicit);
2216 /* i386 ABI does not specify red zone. It still makes sense to use it
2217 when programmer takes care to stack from being destroyed. */
2218 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2219 target_flags |= MASK_NO_RED_ZONE;
2222 /* Validate -mpreferred-stack-boundary= value, or provide default.
2223 The default of 128 bits is for Pentium III's SSE __m128. We can't
2224 change it because of optimize_size. Otherwise, we can't mix object
2225 files compiled with -Os and -On. */
2226 ix86_preferred_stack_boundary = 128;
2227 if (ix86_preferred_stack_boundary_string)
2229 i = atoi (ix86_preferred_stack_boundary_string);
2230 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2231 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2232 TARGET_64BIT ? 4 : 2);
2234 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2237 /* Accept -msseregparm only if at least SSE support is enabled. */
2238 if (TARGET_SSEREGPARM
2240 error ("-msseregparm used without SSE enabled");
2242 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2243 if (ix86_fpmath_string != 0)
2245 if (! strcmp (ix86_fpmath_string, "387"))
2246 ix86_fpmath = FPMATH_387;
2247 else if (! strcmp (ix86_fpmath_string, "sse"))
2251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2252 ix86_fpmath = FPMATH_387;
2255 ix86_fpmath = FPMATH_SSE;
2257 else if (! strcmp (ix86_fpmath_string, "387,sse")
2258 || ! strcmp (ix86_fpmath_string, "sse,387"))
2262 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2263 ix86_fpmath = FPMATH_387;
2265 else if (!TARGET_80387)
2267 warning (0, "387 instruction set disabled, using SSE arithmetics");
2268 ix86_fpmath = FPMATH_SSE;
2271 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2274 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2277 /* If the i387 is disabled, then do not return values in it. */
2279 target_flags &= ~MASK_FLOAT_RETURNS;
2281 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2282 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2284 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2286 /* ??? Unwind info is not correct around the CFG unless either a frame
2287 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2288 unwind info generation to be aware of the CFG and propagating states
2290 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2291 || flag_exceptions || flag_non_call_exceptions)
2292 && flag_omit_frame_pointer
2293 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2295 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2296 warning (0, "unwind tables currently require either a frame pointer "
2297 "or -maccumulate-outgoing-args for correctness");
2298 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2301 /* For sane SSE instruction set generation we need fcomi instruction.
2302 It is safe to enable all CMOVE instructions. */
2306 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2309 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2310 p = strchr (internal_label_prefix, 'X');
2311 internal_label_prefix_len = p - internal_label_prefix;
2315 /* When scheduling description is not available, disable scheduler pass
2316 so it won't slow down the compilation and make x87 code slower. */
2317 if (!TARGET_SCHEDULE)
2318 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2320 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2321 set_param_value ("simultaneous-prefetches",
2322 ix86_cost->simultaneous_prefetches);
2323 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2324 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2327 /* Return true if this goes in large data/bss. */
2330 ix86_in_large_data_p (tree exp)
2332 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2335 /* Functions are never large data. */
2336 if (TREE_CODE (exp) == FUNCTION_DECL)
2339 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2341 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2342 if (strcmp (section, ".ldata") == 0
2343 || strcmp (section, ".lbss") == 0)
2349 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2351 /* If this is an incomplete type with size 0, then we can't put it
2352 in data because it might be too big when completed. */
2353 if (!size || size > ix86_section_threshold)
2360 /* Switch to the appropriate section for output of DECL.
2361 DECL is either a `VAR_DECL' node or a constant of some sort.
2362 RELOC indicates whether forming the initial value of DECL requires
2363 link-time relocations. */
2365 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2369 x86_64_elf_select_section (tree decl, int reloc,
2370 unsigned HOST_WIDE_INT align)
2372 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2373 && ix86_in_large_data_p (decl))
2375 const char *sname = NULL;
2376 unsigned int flags = SECTION_WRITE;
2377 switch (categorize_decl_for_section (decl, reloc))
2382 case SECCAT_DATA_REL:
2383 sname = ".ldata.rel";
2385 case SECCAT_DATA_REL_LOCAL:
2386 sname = ".ldata.rel.local";
2388 case SECCAT_DATA_REL_RO:
2389 sname = ".ldata.rel.ro";
2391 case SECCAT_DATA_REL_RO_LOCAL:
2392 sname = ".ldata.rel.ro.local";
2396 flags |= SECTION_BSS;
2399 case SECCAT_RODATA_MERGE_STR:
2400 case SECCAT_RODATA_MERGE_STR_INIT:
2401 case SECCAT_RODATA_MERGE_CONST:
2405 case SECCAT_SRODATA:
2412 /* We don't split these for medium model. Place them into
2413 default sections and hope for best. */
2418 /* We might get called with string constants, but get_named_section
2419 doesn't like them as they are not DECLs. Also, we need to set
2420 flags in that case. */
2422 return get_section (sname, flags, NULL);
2423 return get_named_section (decl, sname, reloc);
2426 return default_elf_select_section (decl, reloc, align);
2429 /* Build up a unique section name, expressed as a
2430 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2431 RELOC indicates whether the initial value of EXP requires
2432 link-time relocations. */
2434 static void ATTRIBUTE_UNUSED
2435 x86_64_elf_unique_section (tree decl, int reloc)
2437 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2438 && ix86_in_large_data_p (decl))
2440 const char *prefix = NULL;
2441 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2442 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2444 switch (categorize_decl_for_section (decl, reloc))
2447 case SECCAT_DATA_REL:
2448 case SECCAT_DATA_REL_LOCAL:
2449 case SECCAT_DATA_REL_RO:
2450 case SECCAT_DATA_REL_RO_LOCAL:
2451 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2454 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2457 case SECCAT_RODATA_MERGE_STR:
2458 case SECCAT_RODATA_MERGE_STR_INIT:
2459 case SECCAT_RODATA_MERGE_CONST:
2460 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2462 case SECCAT_SRODATA:
2469 /* We don't split these for medium model. Place them into
2470 default sections and hope for best. */
2478 plen = strlen (prefix);
2480 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2481 name = targetm.strip_name_encoding (name);
2482 nlen = strlen (name);
2484 string = alloca (nlen + plen + 1);
2485 memcpy (string, prefix, plen);
2486 memcpy (string + plen, name, nlen + 1);
2488 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2492 default_unique_section (decl, reloc);
2495 #ifdef COMMON_ASM_OP
2496 /* This says how to output assembler code to declare an
2497 uninitialized external linkage data object.
2499 For medium model x86-64 we need to use .largecomm opcode for
2502 x86_elf_aligned_common (FILE *file,
2503 const char *name, unsigned HOST_WIDE_INT size,
2506 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2507 && size > (unsigned int)ix86_section_threshold)
2508 fprintf (file, ".largecomm\t");
2510 fprintf (file, "%s", COMMON_ASM_OP);
2511 assemble_name (file, name);
2512 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2513 size, align / BITS_PER_UNIT);
2517 /* Utility function for targets to use in implementing
2518 ASM_OUTPUT_ALIGNED_BSS. */
2521 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2522 const char *name, unsigned HOST_WIDE_INT size,
2525 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2526 && size > (unsigned int)ix86_section_threshold)
2527 switch_to_section (get_named_section (decl, ".lbss", 0));
2529 switch_to_section (bss_section);
2530 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2531 #ifdef ASM_DECLARE_OBJECT_NAME
2532 last_assemble_variable_decl = decl;
2533 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2535 /* Standard thing is just output label for the object. */
2536 ASM_OUTPUT_LABEL (file, name);
2537 #endif /* ASM_DECLARE_OBJECT_NAME */
2538 ASM_OUTPUT_SKIP (file, size ? size : 1);
2542 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2544 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2545 make the problem with not enough registers even worse. */
2546 #ifdef INSN_SCHEDULING
2548 flag_schedule_insns = 0;
2552 /* The Darwin libraries never set errno, so we might as well
2553 avoid calling them when that's the only reason we would. */
2554 flag_errno_math = 0;
2556 /* The default values of these switches depend on the TARGET_64BIT
2557 that is not known at this moment. Mark these values with 2 and
2558 let user the to override these. In case there is no command line option
2559 specifying them, we will set the defaults in override_options. */
2561 flag_omit_frame_pointer = 2;
2562 flag_pcc_struct_return = 2;
2563 flag_asynchronous_unwind_tables = 2;
2564 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2565 SUBTARGET_OPTIMIZATION_OPTIONS;
2569 /* Decide whether we can make a sibling call to a function. DECL is the
2570 declaration of the function being targeted by the call and EXP is the
2571 CALL_EXPR representing the call. */
2574 ix86_function_ok_for_sibcall (tree decl, tree exp)
2579 /* If we are generating position-independent code, we cannot sibcall
2580 optimize any indirect call, or a direct call to a global function,
2581 as the PLT requires %ebx be live. */
2582 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2589 func = TREE_TYPE (CALL_EXPR_FN (exp));
2590 if (POINTER_TYPE_P (func))
2591 func = TREE_TYPE (func);
2594 /* Check that the return value locations are the same. Like
2595 if we are returning floats on the 80387 register stack, we cannot
2596 make a sibcall from a function that doesn't return a float to a
2597 function that does or, conversely, from a function that does return
2598 a float to a function that doesn't; the necessary stack adjustment
2599 would not be executed. This is also the place we notice
2600 differences in the return value ABI. Note that it is ok for one
2601 of the functions to have void return type as long as the return
2602 value of the other is passed in a register. */
2603 a = ix86_function_value (TREE_TYPE (exp), func, false);
2604 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2606 if (STACK_REG_P (a) || STACK_REG_P (b))
2608 if (!rtx_equal_p (a, b))
2611 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2613 else if (!rtx_equal_p (a, b))
2616 /* If this call is indirect, we'll need to be able to use a call-clobbered
2617 register for the address of the target function. Make sure that all
2618 such registers are not used for passing parameters. */
2619 if (!decl && !TARGET_64BIT)
2623 /* We're looking at the CALL_EXPR, we need the type of the function. */
2624 type = CALL_EXPR_FN (exp); /* pointer expression */
2625 type = TREE_TYPE (type); /* pointer type */
2626 type = TREE_TYPE (type); /* function type */
2628 if (ix86_function_regparm (type, NULL) >= 3)
2630 /* ??? Need to count the actual number of registers to be used,
2631 not the possible number of registers. Fix later. */
2636 /* Dllimport'd functions are also called indirectly. */
2637 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2638 && decl && DECL_DLLIMPORT_P (decl)
2639 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2642 /* If we forced aligned the stack, then sibcalling would unalign the
2643 stack, which may break the called function. */
2644 if (cfun->machine->force_align_arg_pointer)
2647 /* Otherwise okay. That also includes certain types of indirect calls. */
2651 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2652 calling convention attributes;
2653 arguments as in struct attribute_spec.handler. */
2656 ix86_handle_cconv_attribute (tree *node, tree name,
2658 int flags ATTRIBUTE_UNUSED,
2661 if (TREE_CODE (*node) != FUNCTION_TYPE
2662 && TREE_CODE (*node) != METHOD_TYPE
2663 && TREE_CODE (*node) != FIELD_DECL
2664 && TREE_CODE (*node) != TYPE_DECL)
2666 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2667 IDENTIFIER_POINTER (name));
2668 *no_add_attrs = true;
2672 /* Can combine regparm with all attributes but fastcall. */
2673 if (is_attribute_p ("regparm", name))
2677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2679 error ("fastcall and regparm attributes are not compatible");
2682 cst = TREE_VALUE (args);
2683 if (TREE_CODE (cst) != INTEGER_CST)
2685 warning (OPT_Wattributes,
2686 "%qs attribute requires an integer constant argument",
2687 IDENTIFIER_POINTER (name));
2688 *no_add_attrs = true;
2690 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2692 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2693 IDENTIFIER_POINTER (name), REGPARM_MAX);
2694 *no_add_attrs = true;
2698 && lookup_attribute (ix86_force_align_arg_pointer_string,
2699 TYPE_ATTRIBUTES (*node))
2700 && compare_tree_int (cst, REGPARM_MAX-1))
2702 error ("%s functions limited to %d register parameters",
2703 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2711 /* Do not warn when emulating the MS ABI. */
2712 if (!TARGET_64BIT_MS_ABI)
2713 warning (OPT_Wattributes, "%qs attribute ignored",
2714 IDENTIFIER_POINTER (name));
2715 *no_add_attrs = true;
2719 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2720 if (is_attribute_p ("fastcall", name))
2722 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2724 error ("fastcall and cdecl attributes are not compatible");
2726 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2728 error ("fastcall and stdcall attributes are not compatible");
2730 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2732 error ("fastcall and regparm attributes are not compatible");
2736 /* Can combine stdcall with fastcall (redundant), regparm and
2738 else if (is_attribute_p ("stdcall", name))
2740 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2742 error ("stdcall and cdecl attributes are not compatible");
2744 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2746 error ("stdcall and fastcall attributes are not compatible");
2750 /* Can combine cdecl with regparm and sseregparm. */
2751 else if (is_attribute_p ("cdecl", name))
2753 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2755 error ("stdcall and cdecl attributes are not compatible");
2757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2759 error ("fastcall and cdecl attributes are not compatible");
2763 /* Can combine sseregparm with all attributes. */
2768 /* Return 0 if the attributes for two types are incompatible, 1 if they
2769 are compatible, and 2 if they are nearly compatible (which causes a
2770 warning to be generated). */
2773 ix86_comp_type_attributes (tree type1, tree type2)
2775 /* Check for mismatch of non-default calling convention. */
2776 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2778 if (TREE_CODE (type1) != FUNCTION_TYPE)
2781 /* Check for mismatched fastcall/regparm types. */
2782 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2783 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2784 || (ix86_function_regparm (type1, NULL)
2785 != ix86_function_regparm (type2, NULL)))
2788 /* Check for mismatched sseregparm types. */
2789 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2790 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2793 /* Check for mismatched return types (cdecl vs stdcall). */
2794 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2795 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2801 /* Return the regparm value for a function with the indicated TYPE and DECL.
2802 DECL may be NULL when calling function indirectly
2803 or considering a libcall. */
2806 ix86_function_regparm (tree type, tree decl)
2809 int regparm = ix86_regparm;
2814 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2816 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2818 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2821 /* Use register calling convention for local functions when possible. */
2822 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2823 && flag_unit_at_a_time && !profile_flag)
2825 struct cgraph_local_info *i = cgraph_local_info (decl);
2828 int local_regparm, globals = 0, regno;
2831 /* Make sure no regparm register is taken by a
2832 global register variable. */
2833 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2834 if (global_regs[local_regparm])
2837 /* We can't use regparm(3) for nested functions as these use
2838 static chain pointer in third argument. */
2839 if (local_regparm == 3
2840 && decl_function_context (decl)
2841 && !DECL_NO_STATIC_CHAIN (decl))
2844 /* If the function realigns its stackpointer, the prologue will
2845 clobber %ecx. If we've already generated code for the callee,
2846 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2847 scanning the attributes for the self-realigning property. */
2848 f = DECL_STRUCT_FUNCTION (decl);
2849 if (local_regparm == 3
2850 && (f ? !!f->machine->force_align_arg_pointer
2851 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2852 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2855 /* Each global register variable increases register preassure,
2856 so the more global reg vars there are, the smaller regparm
2857 optimization use, unless requested by the user explicitly. */
2858 for (regno = 0; regno < 6; regno++)
2859 if (global_regs[regno])
2862 = globals < local_regparm ? local_regparm - globals : 0;
2864 if (local_regparm > regparm)
2865 regparm = local_regparm;
2872 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2873 DFmode (2) arguments in SSE registers for a function with the
2874 indicated TYPE and DECL. DECL may be NULL when calling function
2875 indirectly or considering a libcall. Otherwise return 0. */
2878 ix86_function_sseregparm (tree type, tree decl)
2880 gcc_assert (!TARGET_64BIT);
2882 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2883 by the sseregparm attribute. */
2884 if (TARGET_SSEREGPARM
2885 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2890 error ("Calling %qD with attribute sseregparm without "
2891 "SSE/SSE2 enabled", decl);
2893 error ("Calling %qT with attribute sseregparm without "
2894 "SSE/SSE2 enabled", type);
2901 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2902 (and DFmode for SSE2) arguments in SSE registers. */
2903 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2905 struct cgraph_local_info *i = cgraph_local_info (decl);
2907 return TARGET_SSE2 ? 2 : 1;
2913 /* Return true if EAX is live at the start of the function. Used by
2914 ix86_expand_prologue to determine if we need special help before
2915 calling allocate_stack_worker. */
2918 ix86_eax_live_at_start_p (void)
2920 /* Cheat. Don't bother working forward from ix86_function_regparm
2921 to the function type to whether an actual argument is located in
2922 eax. Instead just look at cfg info, which is still close enough
2923 to correct at this point. This gives false positives for broken
2924 functions that might use uninitialized data that happens to be
2925 allocated in eax, but who cares? */
2926 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2929 /* Return true if TYPE has a variable argument list. */
2932 type_has_variadic_args_p (tree type)
2934 tree n, t = TYPE_ARG_TYPES (type);
2939 while ((n = TREE_CHAIN (t)) != NULL)
2942 return TREE_VALUE (t) != void_type_node;
2945 /* Value is the number of bytes of arguments automatically
2946 popped when returning from a subroutine call.
2947 FUNDECL is the declaration node of the function (as a tree),
2948 FUNTYPE is the data type of the function (as a tree),
2949 or for a library call it is an identifier node for the subroutine name.
2950 SIZE is the number of bytes of arguments passed on the stack.
2952 On the 80386, the RTD insn may be used to pop them if the number
2953 of args is fixed, but if the number is variable then the caller
2954 must pop them all. RTD can't be used for library calls now
2955 because the library is compiled with the Unix compiler.
2956 Use of RTD is a selectable option, since it is incompatible with
2957 standard Unix calling sequences. If the option is not selected,
2958 the caller must always pop the args.
2960 The attribute stdcall is equivalent to RTD on a per module basis. */
2963 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2967 /* None of the 64-bit ABIs pop arguments. */
2971 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2973 /* Cdecl functions override -mrtd, and never pop the stack. */
2974 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2976 /* Stdcall and fastcall functions will pop the stack if not
2978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2979 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2982 if (rtd && ! type_has_variadic_args_p (funtype))
2986 /* Lose any fake structure return argument if it is passed on the stack. */
2987 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2988 && !KEEP_AGGREGATE_RETURN_POINTER)
2990 int nregs = ix86_function_regparm (funtype, fundecl);
2992 return GET_MODE_SIZE (Pmode);
2998 /* Argument support functions. */
3000 /* Return true when register may be used to pass function parameters. */
3002 ix86_function_arg_regno_p (int regno)
3005 const int *parm_regs;
3010 return (regno < REGPARM_MAX
3011 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3013 return (regno < REGPARM_MAX
3014 || (TARGET_MMX && MMX_REGNO_P (regno)
3015 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3016 || (TARGET_SSE && SSE_REGNO_P (regno)
3017 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3022 if (SSE_REGNO_P (regno) && TARGET_SSE)
3027 if (TARGET_SSE && SSE_REGNO_P (regno)
3028 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3032 /* RAX is used as hidden argument to va_arg functions. */
3033 if (!TARGET_64BIT_MS_ABI && regno == 0)
3036 if (TARGET_64BIT_MS_ABI)
3037 parm_regs = x86_64_ms_abi_int_parameter_registers;
3039 parm_regs = x86_64_int_parameter_registers;
3040 for (i = 0; i < REGPARM_MAX; i++)
3041 if (regno == parm_regs[i])
3046 /* Return if we do not know how to pass TYPE solely in registers. */
3049 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3051 if (must_pass_in_stack_var_size_or_pad (mode, type))
3054 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3055 The layout_type routine is crafty and tries to trick us into passing
3056 currently unsupported vector types on the stack by using TImode. */
3057 return (!TARGET_64BIT && mode == TImode
3058 && type && TREE_CODE (type) != VECTOR_TYPE);
3061 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3062 for a call to a function whose data type is FNTYPE.
3063 For a library call, FNTYPE is 0. */
3066 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3067 tree fntype, /* tree ptr for function decl */
3068 rtx libname, /* SYMBOL_REF of library name or 0 */
3071 memset (cum, 0, sizeof (*cum));
3073 /* Set up the number of registers to use for passing arguments. */
3074 cum->nregs = ix86_regparm;
3076 cum->sse_nregs = SSE_REGPARM_MAX;
3078 cum->mmx_nregs = MMX_REGPARM_MAX;
3079 cum->warn_sse = true;
3080 cum->warn_mmx = true;
3081 cum->maybe_vaarg = (fntype
3082 ? (!TYPE_ARG_TYPES (fntype)
3083 || type_has_variadic_args_p (fntype))
3088 /* If there are variable arguments, then we won't pass anything
3089 in registers in 32-bit mode. */
3090 if (cum->maybe_vaarg)
3100 /* Use ecx and edx registers if function has fastcall attribute,
3101 else look for regparm information. */
3104 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3110 cum->nregs = ix86_function_regparm (fntype, fndecl);
3113 /* Set up the number of SSE registers used for passing SFmode
3114 and DFmode arguments. Warn for mismatching ABI. */
3115 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3119 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3120 But in the case of vector types, it is some vector mode.
3122 When we have only some of our vector isa extensions enabled, then there
3123 are some modes for which vector_mode_supported_p is false. For these
3124 modes, the generic vector support in gcc will choose some non-vector mode
3125 in order to implement the type. By computing the natural mode, we'll
3126 select the proper ABI location for the operand and not depend on whatever
3127 the middle-end decides to do with these vector types. */
3129 static enum machine_mode
3130 type_natural_mode (tree type)
3132 enum machine_mode mode = TYPE_MODE (type);
3134 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3136 HOST_WIDE_INT size = int_size_in_bytes (type);
3137 if ((size == 8 || size == 16)
3138 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3139 && TYPE_VECTOR_SUBPARTS (type) > 1)
3141 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3143 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3144 mode = MIN_MODE_VECTOR_FLOAT;
3146 mode = MIN_MODE_VECTOR_INT;
3148 /* Get the mode which has this inner mode and number of units. */
3149 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3150 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3151 && GET_MODE_INNER (mode) == innermode)
3161 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3162 this may not agree with the mode that the type system has chosen for the
3163 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3164 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3167 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3172 if (orig_mode != BLKmode)
3173 tmp = gen_rtx_REG (orig_mode, regno);
3176 tmp = gen_rtx_REG (mode, regno);
3177 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3178 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3184 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3185 of this code is to classify each 8bytes of incoming argument by the register
3186 class and assign registers accordingly. */
3188 /* Return the union class of CLASS1 and CLASS2.
3189 See the x86-64 PS ABI for details. */
3191 static enum x86_64_reg_class
3192 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3194 /* Rule #1: If both classes are equal, this is the resulting class. */
3195 if (class1 == class2)
3198 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3200 if (class1 == X86_64_NO_CLASS)
3202 if (class2 == X86_64_NO_CLASS)
3205 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3206 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3207 return X86_64_MEMORY_CLASS;
3209 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3210 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3211 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3212 return X86_64_INTEGERSI_CLASS;
3213 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3214 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3215 return X86_64_INTEGER_CLASS;
3217 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3219 if (class1 == X86_64_X87_CLASS
3220 || class1 == X86_64_X87UP_CLASS
3221 || class1 == X86_64_COMPLEX_X87_CLASS
3222 || class2 == X86_64_X87_CLASS
3223 || class2 == X86_64_X87UP_CLASS
3224 || class2 == X86_64_COMPLEX_X87_CLASS)
3225 return X86_64_MEMORY_CLASS;
3227 /* Rule #6: Otherwise class SSE is used. */
3228 return X86_64_SSE_CLASS;
3231 /* Classify the argument of type TYPE and mode MODE.
3232 CLASSES will be filled by the register class used to pass each word
3233 of the operand. The number of words is returned. In case the parameter
3234 should be passed in memory, 0 is returned. As a special case for zero
3235 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3237 BIT_OFFSET is used internally for handling records and specifies offset
3238 of the offset in bits modulo 256 to avoid overflow cases.
3240 See the x86-64 PS ABI for details.
3244 classify_argument (enum machine_mode mode, tree type,
3245 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3247 HOST_WIDE_INT bytes =
3248 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3249 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3251 /* Variable sized entities are always passed/returned in memory. */
3255 if (mode != VOIDmode
3256 && targetm.calls.must_pass_in_stack (mode, type))
3259 if (type && AGGREGATE_TYPE_P (type))
3263 enum x86_64_reg_class subclasses[MAX_CLASSES];
3265 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3269 for (i = 0; i < words; i++)
3270 classes[i] = X86_64_NO_CLASS;
3272 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3273 signalize memory class, so handle it as special case. */
3276 classes[0] = X86_64_NO_CLASS;
3280 /* Classify each field of record and merge classes. */
3281 switch (TREE_CODE (type))
3284 /* And now merge the fields of structure. */
3285 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3287 if (TREE_CODE (field) == FIELD_DECL)
3291 if (TREE_TYPE (field) == error_mark_node)
3294 /* Bitfields are always classified as integer. Handle them
3295 early, since later code would consider them to be
3296 misaligned integers. */
3297 if (DECL_BIT_FIELD (field))
3299 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3300 i < ((int_bit_position (field) + (bit_offset % 64))
3301 + tree_low_cst (DECL_SIZE (field), 0)
3304 merge_classes (X86_64_INTEGER_CLASS,
3309 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3310 TREE_TYPE (field), subclasses,
3311 (int_bit_position (field)
3312 + bit_offset) % 256);
3315 for (i = 0; i < num; i++)
3318 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3320 merge_classes (subclasses[i], classes[i + pos]);
3328 /* Arrays are handled as small records. */
3331 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3332 TREE_TYPE (type), subclasses, bit_offset);
3336 /* The partial classes are now full classes. */
3337 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3338 subclasses[0] = X86_64_SSE_CLASS;
3339 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3340 subclasses[0] = X86_64_INTEGER_CLASS;
3342 for (i = 0; i < words; i++)
3343 classes[i] = subclasses[i % num];
3348 case QUAL_UNION_TYPE:
3349 /* Unions are similar to RECORD_TYPE but offset is always 0.
3351 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3353 if (TREE_CODE (field) == FIELD_DECL)
3357 if (TREE_TYPE (field) == error_mark_node)
3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361 TREE_TYPE (field), subclasses,
3365 for (i = 0; i < num; i++)
3366 classes[i] = merge_classes (subclasses[i], classes[i]);
3375 /* Final merger cleanup. */
3376 for (i = 0; i < words; i++)
3378 /* If one class is MEMORY, everything should be passed in
3380 if (classes[i] == X86_64_MEMORY_CLASS)
3383 /* The X86_64_SSEUP_CLASS should be always preceded by
3384 X86_64_SSE_CLASS. */
3385 if (classes[i] == X86_64_SSEUP_CLASS
3386 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3387 classes[i] = X86_64_SSE_CLASS;
3389 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3390 if (classes[i] == X86_64_X87UP_CLASS
3391 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3392 classes[i] = X86_64_SSE_CLASS;
3397 /* Compute alignment needed. We align all types to natural boundaries with
3398 exception of XFmode that is aligned to 64bits. */
3399 if (mode != VOIDmode && mode != BLKmode)
3401 int mode_alignment = GET_MODE_BITSIZE (mode);
3404 mode_alignment = 128;
3405 else if (mode == XCmode)
3406 mode_alignment = 256;
3407 if (COMPLEX_MODE_P (mode))
3408 mode_alignment /= 2;
3409 /* Misaligned fields are always returned in memory. */
3410 if (bit_offset % mode_alignment)
3414 /* for V1xx modes, just use the base mode */
3415 if (VECTOR_MODE_P (mode)
3416 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3417 mode = GET_MODE_INNER (mode);
3419 /* Classification of atomic types. */
3424 classes[0] = X86_64_SSE_CLASS;
3427 classes[0] = X86_64_SSE_CLASS;
3428 classes[1] = X86_64_SSEUP_CLASS;
3437 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3438 classes[0] = X86_64_INTEGERSI_CLASS;
3440 classes[0] = X86_64_INTEGER_CLASS;
3444 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3449 if (!(bit_offset % 64))
3450 classes[0] = X86_64_SSESF_CLASS;
3452 classes[0] = X86_64_SSE_CLASS;
3455 classes[0] = X86_64_SSEDF_CLASS;
3458 classes[0] = X86_64_X87_CLASS;
3459 classes[1] = X86_64_X87UP_CLASS;
3462 classes[0] = X86_64_SSE_CLASS;
3463 classes[1] = X86_64_SSEUP_CLASS;
3466 classes[0] = X86_64_SSE_CLASS;
3469 classes[0] = X86_64_SSEDF_CLASS;
3470 classes[1] = X86_64_SSEDF_CLASS;
3473 classes[0] = X86_64_COMPLEX_X87_CLASS;
3476 /* This modes is larger than 16 bytes. */
3484 classes[0] = X86_64_SSE_CLASS;
3485 classes[1] = X86_64_SSEUP_CLASS;
3491 classes[0] = X86_64_SSE_CLASS;
3497 gcc_assert (VECTOR_MODE_P (mode));
3502 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3504 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3505 classes[0] = X86_64_INTEGERSI_CLASS;
3507 classes[0] = X86_64_INTEGER_CLASS;
3508 classes[1] = X86_64_INTEGER_CLASS;
3509 return 1 + (bytes > 8);
3513 /* Examine the argument and return set number of register required in each
3514 class. Return 0 iff parameter should be passed in memory. */
3516 examine_argument (enum machine_mode mode, tree type, int in_return,
3517 int *int_nregs, int *sse_nregs)
3519 enum x86_64_reg_class class[MAX_CLASSES];
3520 int n = classify_argument (mode, type, class, 0);
3526 for (n--; n >= 0; n--)
3529 case X86_64_INTEGER_CLASS:
3530 case X86_64_INTEGERSI_CLASS:
3533 case X86_64_SSE_CLASS:
3534 case X86_64_SSESF_CLASS:
3535 case X86_64_SSEDF_CLASS:
3538 case X86_64_NO_CLASS:
3539 case X86_64_SSEUP_CLASS:
3541 case X86_64_X87_CLASS:
3542 case X86_64_X87UP_CLASS:
3546 case X86_64_COMPLEX_X87_CLASS:
3547 return in_return ? 2 : 0;
3548 case X86_64_MEMORY_CLASS:
3554 /* Construct container for the argument used by GCC interface. See
3555 FUNCTION_ARG for the detailed description. */
3558 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3559 tree type, int in_return, int nintregs, int nsseregs,
3560 const int *intreg, int sse_regno)
3562 /* The following variables hold the static issued_error state. */
3563 static bool issued_sse_arg_error;
3564 static bool issued_sse_ret_error;
3565 static bool issued_x87_ret_error;
3567 enum machine_mode tmpmode;
3569 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3570 enum x86_64_reg_class class[MAX_CLASSES];
3574 int needed_sseregs, needed_intregs;
3575 rtx exp[MAX_CLASSES];
3578 n = classify_argument (mode, type, class, 0);
3581 if (!examine_argument (mode, type, in_return, &needed_intregs,
3584 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3587 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3588 some less clueful developer tries to use floating-point anyway. */
3589 if (needed_sseregs && !TARGET_SSE)
3593 if (!issued_sse_ret_error)
3595 error ("SSE register return with SSE disabled");
3596 issued_sse_ret_error = true;
3599 else if (!issued_sse_arg_error)
3601 error ("SSE register argument with SSE disabled");
3602 issued_sse_arg_error = true;
3607 /* Likewise, error if the ABI requires us to return values in the
3608 x87 registers and the user specified -mno-80387. */
3609 if (!TARGET_80387 && in_return)
3610 for (i = 0; i < n; i++)
3611 if (class[i] == X86_64_X87_CLASS
3612 || class[i] == X86_64_X87UP_CLASS
3613 || class[i] == X86_64_COMPLEX_X87_CLASS)
3615 if (!issued_x87_ret_error)
3617 error ("x87 register return with x87 disabled");
3618 issued_x87_ret_error = true;
3623 /* First construct simple cases. Avoid SCmode, since we want to use
3624 single register to pass this type. */
3625 if (n == 1 && mode != SCmode)
3628 case X86_64_INTEGER_CLASS:
3629 case X86_64_INTEGERSI_CLASS:
3630 return gen_rtx_REG (mode, intreg[0]);
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3634 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3635 case X86_64_X87_CLASS:
3636 case X86_64_COMPLEX_X87_CLASS:
3637 return gen_rtx_REG (mode, FIRST_STACK_REG);
3638 case X86_64_NO_CLASS:
3639 /* Zero sized array, struct or class. */
3644 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3646 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3649 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3650 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3651 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3652 && class[1] == X86_64_INTEGER_CLASS
3653 && (mode == CDImode || mode == TImode || mode == TFmode)
3654 && intreg[0] + 1 == intreg[1])
3655 return gen_rtx_REG (mode, intreg[0]);
3657 /* Otherwise figure out the entries of the PARALLEL. */
3658 for (i = 0; i < n; i++)
3662 case X86_64_NO_CLASS:
3664 case X86_64_INTEGER_CLASS:
3665 case X86_64_INTEGERSI_CLASS:
3666 /* Merge TImodes on aligned occasions here too. */
3667 if (i * 8 + 8 > bytes)
3668 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3669 else if (class[i] == X86_64_INTEGERSI_CLASS)
3673 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3674 if (tmpmode == BLKmode)
3676 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3677 gen_rtx_REG (tmpmode, *intreg),
3681 case X86_64_SSESF_CLASS:
3682 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3683 gen_rtx_REG (SFmode,
3684 SSE_REGNO (sse_regno)),
3688 case X86_64_SSEDF_CLASS:
3689 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3690 gen_rtx_REG (DFmode,
3691 SSE_REGNO (sse_regno)),
3695 case X86_64_SSE_CLASS:
3696 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3700 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3701 gen_rtx_REG (tmpmode,
3702 SSE_REGNO (sse_regno)),
3704 if (tmpmode == TImode)
3713 /* Empty aligned struct, union or class. */
3717 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3718 for (i = 0; i < nexps; i++)
3719 XVECEXP (ret, 0, i) = exp [i];
3723 /* Update the data in CUM to advance over an argument of mode MODE
3724 and data type TYPE. (TYPE is null for libcalls where that information
3725 may not be available.) */
3728 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3729 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3745 cum->words += words;
3746 cum->nregs -= words;
3747 cum->regno += words;
3749 if (cum->nregs <= 0)
3757 if (cum->float_in_sse < 2)
3760 if (cum->float_in_sse < 1)
3771 if (!type || !AGGREGATE_TYPE_P (type))
3773 cum->sse_words += words;
3774 cum->sse_nregs -= 1;
3775 cum->sse_regno += 1;
3776 if (cum->sse_nregs <= 0)
3788 if (!type || !AGGREGATE_TYPE_P (type))
3790 cum->mmx_words += words;
3791 cum->mmx_nregs -= 1;
3792 cum->mmx_regno += 1;
3793 if (cum->mmx_nregs <= 0)
3804 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3805 tree type, HOST_WIDE_INT words)
3807 int int_nregs, sse_nregs;
3809 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3810 cum->words += words;
3811 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3813 cum->nregs -= int_nregs;
3814 cum->sse_nregs -= sse_nregs;
3815 cum->regno += int_nregs;
3816 cum->sse_regno += sse_nregs;
3819 cum->words += words;
3823 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3824 HOST_WIDE_INT words)
3826 /* Otherwise, this should be passed indirect. */
3827 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3829 cum->words += words;
3838 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3839 tree type, int named ATTRIBUTE_UNUSED)
3841 HOST_WIDE_INT bytes, words;
3843 if (mode == BLKmode)
3844 bytes = int_size_in_bytes (type);
3846 bytes = GET_MODE_SIZE (mode);
3847 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3850 mode = type_natural_mode (type);
3852 if (TARGET_64BIT_MS_ABI)
3853 function_arg_advance_ms_64 (cum, bytes, words);
3854 else if (TARGET_64BIT)
3855 function_arg_advance_64 (cum, mode, type, words);
3857 function_arg_advance_32 (cum, mode, type, bytes, words);
3860 /* Define where to put the arguments to a function.
3861 Value is zero to push the argument on the stack,
3862 or a hard register in which to store the argument.
3864 MODE is the argument's machine mode.
3865 TYPE is the data type of the argument (as a tree).
3866 This is null for libcalls where that information may
3868 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3869 the preceding args and about the function being called.
3870 NAMED is nonzero if this argument is a named parameter
3871 (otherwise it is an extra parameter matching an ellipsis). */
3874 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3875 enum machine_mode orig_mode, tree type,
3876 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3878 static bool warnedsse, warnedmmx;
3880 /* Avoid the AL settings for the Unix64 ABI. */
3881 if (mode == VOIDmode)
3897 if (words <= cum->nregs)
3899 int regno = cum->regno;
3901 /* Fastcall allocates the first two DWORD (SImode) or
3902 smaller arguments to ECX and EDX. */
3905 if (mode == BLKmode || mode == DImode)
3908 /* ECX not EAX is the first allocated register. */
3912 return gen_rtx_REG (mode, regno);
3917 if (cum->float_in_sse < 2)
3920 if (cum->float_in_sse < 1)
3930 if (!type || !AGGREGATE_TYPE_P (type))
3932 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3935 warning (0, "SSE vector argument without SSE enabled "
3939 return gen_reg_or_parallel (mode, orig_mode,
3940 cum->sse_regno + FIRST_SSE_REG);
3948 if (!type || !AGGREGATE_TYPE_P (type))
3950 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3953 warning (0, "MMX vector argument without MMX enabled "
3957 return gen_reg_or_parallel (mode, orig_mode,
3958 cum->mmx_regno + FIRST_MMX_REG);
3967 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3968 enum machine_mode orig_mode, tree type)
3970 /* Handle a hidden AL argument containing number of registers
3971 for varargs x86-64 functions. */
3972 if (mode == VOIDmode)
3973 return GEN_INT (cum->maybe_vaarg
3974 ? (cum->sse_nregs < 0
3979 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3981 &x86_64_int_parameter_registers [cum->regno],
3986 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3987 enum machine_mode orig_mode, int named)
3991 /* Avoid the AL settings for the Unix64 ABI. */
3992 if (mode == VOIDmode)
3995 /* If we've run out of registers, it goes on the stack. */
3996 if (cum->nregs == 0)
3999 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4001 /* Only floating point modes are passed in anything but integer regs. */
4002 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4005 regno = cum->regno + FIRST_SSE_REG;
4010 /* Unnamed floating parameters are passed in both the
4011 SSE and integer registers. */
4012 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4013 t2 = gen_rtx_REG (mode, regno);
4014 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4015 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4016 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4020 return gen_reg_or_parallel (mode, orig_mode, regno);
4024 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4025 tree type, int named)
4027 enum machine_mode mode = omode;
4028 HOST_WIDE_INT bytes, words;
4030 if (mode == BLKmode)
4031 bytes = int_size_in_bytes (type);
4033 bytes = GET_MODE_SIZE (mode);
4034 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4036 /* To simplify the code below, represent vector types with a vector mode
4037 even if MMX/SSE are not active. */
4038 if (type && TREE_CODE (type) == VECTOR_TYPE)
4039 mode = type_natural_mode (type);
4041 if (TARGET_64BIT_MS_ABI)
4042 return function_arg_ms_64 (cum, mode, omode, named);
4043 else if (TARGET_64BIT)
4044 return function_arg_64 (cum, mode, omode, type);
4046 return function_arg_32 (cum, mode, omode, type, bytes, words);
4049 /* A C expression that indicates when an argument must be passed by
4050 reference. If nonzero for an argument, a copy of that argument is
4051 made in memory and a pointer to the argument is passed instead of
4052 the argument itself. The pointer is passed in whatever way is
4053 appropriate for passing a pointer to that type. */
4056 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4057 enum machine_mode mode ATTRIBUTE_UNUSED,
4058 tree type, bool named ATTRIBUTE_UNUSED)
4060 if (TARGET_64BIT_MS_ABI)
4064 /* Arrays are passed by reference. */
4065 if (TREE_CODE (type) == ARRAY_TYPE)
4068 if (AGGREGATE_TYPE_P (type))
4070 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4071 are passed by reference. */
4072 int el2 = exact_log2 (int_size_in_bytes (type));
4073 return !(el2 >= 0 && el2 <= 3);
4077 /* __m128 is passed by reference. */
4078 /* ??? How to handle complex? For now treat them as structs,
4079 and pass them by reference if they're too large. */
4080 if (GET_MODE_SIZE (mode) > 8)
4083 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4089 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4090 ABI. Only called if TARGET_SSE. */
4092 contains_128bit_aligned_vector_p (tree type)
4094 enum machine_mode mode = TYPE_MODE (type);
4095 if (SSE_REG_MODE_P (mode)
4096 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4098 if (TYPE_ALIGN (type) < 128)
4101 if (AGGREGATE_TYPE_P (type))
4103 /* Walk the aggregates recursively. */
4104 switch (TREE_CODE (type))
4108 case QUAL_UNION_TYPE:
4112 /* Walk all the structure fields. */
4113 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4115 if (TREE_CODE (field) == FIELD_DECL
4116 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4123 /* Just for use if some languages passes arrays by value. */
4124 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4135 /* Gives the alignment boundary, in bits, of an argument with the
4136 specified mode and type. */
4139 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4143 align = TYPE_ALIGN (type);
4145 align = GET_MODE_ALIGNMENT (mode);
4146 if (align < PARM_BOUNDARY)
4147 align = PARM_BOUNDARY;
4150 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4151 make an exception for SSE modes since these require 128bit
4154 The handling here differs from field_alignment. ICC aligns MMX
4155 arguments to 4 byte boundaries, while structure fields are aligned
4156 to 8 byte boundaries. */
4158 align = PARM_BOUNDARY;
4161 if (!SSE_REG_MODE_P (mode))
4162 align = PARM_BOUNDARY;
4166 if (!contains_128bit_aligned_vector_p (type))
4167 align = PARM_BOUNDARY;
4175 /* Return true if N is a possible register number of function value. */
4178 ix86_function_value_regno_p (int regno)
4185 case FIRST_FLOAT_REG:
4186 if (TARGET_64BIT_MS_ABI)
4188 return TARGET_FLOAT_RETURNS_IN_80387;
4194 if (TARGET_MACHO || TARGET_64BIT)
4202 /* Define how to find the value returned by a function.
4203 VALTYPE is the data type of the value (as a tree).
4204 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4205 otherwise, FUNC is 0. */
4208 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4209 tree fntype, tree fn)
4213 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4214 we normally prevent this case when mmx is not available. However
4215 some ABIs may require the result to be returned like DImode. */
4216 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4217 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4219 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4220 we prevent this case when sse is not available. However some ABIs
4221 may require the result to be returned like integer TImode. */
4222 else if (mode == TImode
4223 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4224 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4226 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4227 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4228 regno = FIRST_FLOAT_REG;
4230 /* Most things go in %eax. */
4233 /* Override FP return register with %xmm0 for local functions when
4234 SSE math is enabled or for functions with sseregparm attribute. */
4235 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4237 int sse_level = ix86_function_sseregparm (fntype, fn);
4238 if ((sse_level >= 1 && mode == SFmode)
4239 || (sse_level == 2 && mode == DFmode))
4240 regno = FIRST_SSE_REG;
4243 return gen_rtx_REG (orig_mode, regno);
4247 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4252 /* Handle libcalls, which don't provide a type node. */
4253 if (valtype == NULL)
4265 return gen_rtx_REG (mode, FIRST_SSE_REG);
4268 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4272 return gen_rtx_REG (mode, 0);
4276 ret = construct_container (mode, orig_mode, valtype, 1,
4277 REGPARM_MAX, SSE_REGPARM_MAX,
4278 x86_64_int_return_registers, 0);
4280 /* For zero sized structures, construct_container returns NULL, but we
4281 need to keep rest of compiler happy by returning meaningful value. */
4283 ret = gen_rtx_REG (orig_mode, 0);
4289 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4291 unsigned int regno = 0;
4295 if (mode == SFmode || mode == DFmode)
4296 regno = FIRST_SSE_REG;
4297 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4298 regno = FIRST_SSE_REG;
4301 return gen_rtx_REG (orig_mode, regno);
4305 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4306 enum machine_mode orig_mode, enum machine_mode mode)
4311 if (fntype_or_decl && DECL_P (fntype_or_decl))
4312 fn = fntype_or_decl;
4313 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4315 if (TARGET_64BIT_MS_ABI)
4316 return function_value_ms_64 (orig_mode, mode);
4317 else if (TARGET_64BIT)
4318 return function_value_64 (orig_mode, mode, valtype);
4320 return function_value_32 (orig_mode, mode, fntype, fn);
4324 ix86_function_value (tree valtype, tree fntype_or_decl,
4325 bool outgoing ATTRIBUTE_UNUSED)
4327 enum machine_mode mode, orig_mode;
4329 orig_mode = TYPE_MODE (valtype);
4330 mode = type_natural_mode (valtype);
4331 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4335 ix86_libcall_value (enum machine_mode mode)
4337 return ix86_function_value_1 (NULL, NULL, mode, mode);
4340 /* Return true iff type is returned in memory. */
4343 return_in_memory_32 (tree type, enum machine_mode mode)
4347 if (mode == BLKmode)
4350 size = int_size_in_bytes (type);
4352 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4355 if (VECTOR_MODE_P (mode) || mode == TImode)
4357 /* User-created vectors small enough to fit in EAX. */
4361 /* MMX/3dNow values are returned in MM0,
4362 except when it doesn't exits. */
4364 return (TARGET_MMX ? 0 : 1);
4366 /* SSE values are returned in XMM0, except when it doesn't exist. */
4368 return (TARGET_SSE ? 0 : 1);
4383 return_in_memory_64 (tree type, enum machine_mode mode)
4385 int needed_intregs, needed_sseregs;
4386 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4390 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4392 HOST_WIDE_INT size = int_size_in_bytes (type);
4394 /* __m128 and friends are returned in xmm0. */
4395 if (size == 16 && VECTOR_MODE_P (mode))
4398 /* Otherwise, the size must be exactly in [1248]. */
4399 return (size != 1 && size != 2 && size != 4 && size != 8);
4403 ix86_return_in_memory (tree type)
4405 enum machine_mode mode = type_natural_mode (type);
4407 if (TARGET_64BIT_MS_ABI)
4408 return return_in_memory_ms_64 (type, mode);
4409 else if (TARGET_64BIT)
4410 return return_in_memory_64 (type, mode);
4412 return return_in_memory_32 (type, mode);
4415 /* Return false iff TYPE is returned in memory. This version is used
4416 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4417 but differs notably in that when MMX is available, 8-byte vectors
4418 are returned in memory, rather than in MMX registers. */
4421 ix86_sol10_return_in_memory (tree type)
4424 enum machine_mode mode = type_natural_mode (type);
4427 return return_in_memory_64 (type, mode);
4429 if (mode == BLKmode)
4432 size = int_size_in_bytes (type);
4434 if (VECTOR_MODE_P (mode))
4436 /* Return in memory only if MMX registers *are* available. This
4437 seems backwards, but it is consistent with the existing
4444 else if (mode == TImode)
4446 else if (mode == XFmode)
4452 /* When returning SSE vector types, we have a choice of either
4453 (1) being abi incompatible with a -march switch, or
4454 (2) generating an error.
4455 Given no good solution, I think the safest thing is one warning.
4456 The user won't be able to use -Werror, but....
4458 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4459 called in response to actually generating a caller or callee that
4460 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4461 via aggregate_value_p for general type probing from tree-ssa. */
4464 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4466 static bool warnedsse, warnedmmx;
4468 if (!TARGET_64BIT && type)
4470 /* Look at the return type of the function, not the function type. */
4471 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4473 if (!TARGET_SSE && !warnedsse)
4476 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4479 warning (0, "SSE vector return without SSE enabled "
4484 if (!TARGET_MMX && !warnedmmx)
4486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4489 warning (0, "MMX vector return without MMX enabled "
4499 /* Create the va_list data type. */
4502 ix86_build_builtin_va_list (void)
4504 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4506 /* For i386 we use plain pointer to argument area. */
4507 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4508 return build_pointer_type (char_type_node);
4510 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4511 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4513 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4514 unsigned_type_node);
4515 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4516 unsigned_type_node);
4517 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4519 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4522 va_list_gpr_counter_field = f_gpr;
4523 va_list_fpr_counter_field = f_fpr;
4525 DECL_FIELD_CONTEXT (f_gpr) = record;
4526 DECL_FIELD_CONTEXT (f_fpr) = record;
4527 DECL_FIELD_CONTEXT (f_ovf) = record;
4528 DECL_FIELD_CONTEXT (f_sav) = record;
4530 TREE_CHAIN (record) = type_decl;
4531 TYPE_NAME (record) = type_decl;
4532 TYPE_FIELDS (record) = f_gpr;
4533 TREE_CHAIN (f_gpr) = f_fpr;
4534 TREE_CHAIN (f_fpr) = f_ovf;
4535 TREE_CHAIN (f_ovf) = f_sav;
4537 layout_type (record);
4539 /* The correct type is an array type of one element. */
4540 return build_array_type (record, build_index_type (size_zero_node));
4543 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4546 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4556 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4559 /* Indicate to allocate space on the stack for varargs save area. */
4560 ix86_save_varrargs_registers = 1;
4561 cfun->stack_alignment_needed = 128;
4563 save_area = frame_pointer_rtx;
4564 set = get_varargs_alias_set ();
4566 for (i = cum->regno;
4568 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4571 mem = gen_rtx_MEM (Pmode,
4572 plus_constant (save_area, i * UNITS_PER_WORD));
4573 MEM_NOTRAP_P (mem) = 1;
4574 set_mem_alias_set (mem, set);
4575 emit_move_insn (mem, gen_rtx_REG (Pmode,
4576 x86_64_int_parameter_registers[i]));
4579 if (cum->sse_nregs && cfun->va_list_fpr_size)
4581 /* Now emit code to save SSE registers. The AX parameter contains number
4582 of SSE parameter registers used to call this function. We use
4583 sse_prologue_save insn template that produces computed jump across
4584 SSE saves. We need some preparation work to get this working. */
4586 label = gen_label_rtx ();
4587 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4589 /* Compute address to jump to :
4590 label - 5*eax + nnamed_sse_arguments*5 */
4591 tmp_reg = gen_reg_rtx (Pmode);
4592 nsse_reg = gen_reg_rtx (Pmode);
4593 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4594 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4595 gen_rtx_MULT (Pmode, nsse_reg,
4600 gen_rtx_CONST (DImode,
4601 gen_rtx_PLUS (DImode,
4603 GEN_INT (cum->sse_regno * 4))));
4605 emit_move_insn (nsse_reg, label_ref);
4606 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4608 /* Compute address of memory block we save into. We always use pointer
4609 pointing 127 bytes after first byte to store - this is needed to keep
4610 instruction size limited by 4 bytes. */
4611 tmp_reg = gen_reg_rtx (Pmode);
4612 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4613 plus_constant (save_area,
4614 8 * REGPARM_MAX + 127)));
4615 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4616 MEM_NOTRAP_P (mem) = 1;
4617 set_mem_alias_set (mem, set);
4618 set_mem_align (mem, BITS_PER_WORD);
4620 /* And finally do the dirty job! */
4621 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4622 GEN_INT (cum->sse_regno), label));
4627 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4629 int set = get_varargs_alias_set ();
4632 for (i = cum->regno; i < REGPARM_MAX; i++)
4636 mem = gen_rtx_MEM (Pmode,
4637 plus_constant (virtual_incoming_args_rtx,
4638 i * UNITS_PER_WORD));
4639 MEM_NOTRAP_P (mem) = 1;
4640 set_mem_alias_set (mem, set);
4642 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4643 emit_move_insn (mem, reg);
4648 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4649 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4652 CUMULATIVE_ARGS next_cum;
4656 /* This argument doesn't appear to be used anymore. Which is good,
4657 because the old code here didn't suppress rtl generation. */
4658 gcc_assert (!no_rtl);
4663 fntype = TREE_TYPE (current_function_decl);
4664 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4665 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4666 != void_type_node));
4668 /* For varargs, we do not want to skip the dummy va_dcl argument.
4669 For stdargs, we do want to skip the last named argument. */
4672 function_arg_advance (&next_cum, mode, type, 1);
4674 if (TARGET_64BIT_MS_ABI)
4675 setup_incoming_varargs_ms_64 (&next_cum);
4677 setup_incoming_varargs_64 (&next_cum);
4680 /* Implement va_start. */
4683 ix86_va_start (tree valist, rtx nextarg)
4685 HOST_WIDE_INT words, n_gpr, n_fpr;
4686 tree f_gpr, f_fpr, f_ovf, f_sav;
4687 tree gpr, fpr, ovf, sav, t;
4690 /* Only 64bit target needs something special. */
4691 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4693 std_expand_builtin_va_start (valist, nextarg);
4697 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4698 f_fpr = TREE_CHAIN (f_gpr);
4699 f_ovf = TREE_CHAIN (f_fpr);
4700 f_sav = TREE_CHAIN (f_ovf);
4702 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4703 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4704 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4705 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4706 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4708 /* Count number of gp and fp argument registers used. */
4709 words = current_function_args_info.words;
4710 n_gpr = current_function_args_info.regno;
4711 n_fpr = current_function_args_info.sse_regno;
4713 if (cfun->va_list_gpr_size)
4715 type = TREE_TYPE (gpr);
4716 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4717 build_int_cst (type, n_gpr * 8));
4718 TREE_SIDE_EFFECTS (t) = 1;
4719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4722 if (cfun->va_list_fpr_size)
4724 type = TREE_TYPE (fpr);
4725 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4726 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4727 TREE_SIDE_EFFECTS (t) = 1;
4728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4731 /* Find the overflow area. */
4732 type = TREE_TYPE (ovf);
4733 t = make_tree (type, virtual_incoming_args_rtx);
4735 t = build2 (PLUS_EXPR, type, t,
4736 build_int_cst (type, words * UNITS_PER_WORD));
4737 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4738 TREE_SIDE_EFFECTS (t) = 1;
4739 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4741 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4743 /* Find the register save area.
4744 Prologue of the function save it right above stack frame. */
4745 type = TREE_TYPE (sav);
4746 t = make_tree (type, frame_pointer_rtx);
4747 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4748 TREE_SIDE_EFFECTS (t) = 1;
4749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4753 /* Implement va_arg. */
4756 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4758 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4759 tree f_gpr, f_fpr, f_ovf, f_sav;
4760 tree gpr, fpr, ovf, sav, t;
4762 tree lab_false, lab_over = NULL_TREE;
4767 enum machine_mode nat_mode;
4769 /* Only 64bit target needs something special. */
4770 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4771 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4773 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4774 f_fpr = TREE_CHAIN (f_gpr);
4775 f_ovf = TREE_CHAIN (f_fpr);
4776 f_sav = TREE_CHAIN (f_ovf);
4778 valist = build_va_arg_indirect_ref (valist);
4779 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4780 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4781 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4782 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4784 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4786 type = build_pointer_type (type);
4787 size = int_size_in_bytes (type);
4788 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4790 nat_mode = type_natural_mode (type);
4791 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4792 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4794 /* Pull the value out of the saved registers. */
4796 addr = create_tmp_var (ptr_type_node, "addr");
4797 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4801 int needed_intregs, needed_sseregs;
4803 tree int_addr, sse_addr;
4805 lab_false = create_artificial_label ();
4806 lab_over = create_artificial_label ();
4808 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4810 need_temp = (!REG_P (container)
4811 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4812 || TYPE_ALIGN (type) > 128));
4814 /* In case we are passing structure, verify that it is consecutive block
4815 on the register save area. If not we need to do moves. */
4816 if (!need_temp && !REG_P (container))
4818 /* Verify that all registers are strictly consecutive */
4819 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4823 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4825 rtx slot = XVECEXP (container, 0, i);
4826 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4827 || INTVAL (XEXP (slot, 1)) != i * 16)
4835 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4837 rtx slot = XVECEXP (container, 0, i);
4838 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4839 || INTVAL (XEXP (slot, 1)) != i * 8)
4851 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4852 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4853 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4854 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4857 /* First ensure that we fit completely in registers. */
4860 t = build_int_cst (TREE_TYPE (gpr),
4861 (REGPARM_MAX - needed_intregs + 1) * 8);
4862 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4863 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4864 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4865 gimplify_and_add (t, pre_p);
4869 t = build_int_cst (TREE_TYPE (fpr),
4870 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4872 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4873 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4874 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4875 gimplify_and_add (t, pre_p);
4878 /* Compute index to start of area used for integer regs. */
4881 /* int_addr = gpr + sav; */
4882 t = fold_convert (ptr_type_node, gpr);
4883 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4884 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4885 gimplify_and_add (t, pre_p);
4889 /* sse_addr = fpr + sav; */
4890 t = fold_convert (ptr_type_node, fpr);
4891 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4892 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4893 gimplify_and_add (t, pre_p);
4898 tree temp = create_tmp_var (type, "va_arg_tmp");
4901 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4902 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4903 gimplify_and_add (t, pre_p);
4905 for (i = 0; i < XVECLEN (container, 0); i++)
4907 rtx slot = XVECEXP (container, 0, i);
4908 rtx reg = XEXP (slot, 0);
4909 enum machine_mode mode = GET_MODE (reg);
4910 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4911 tree addr_type = build_pointer_type (piece_type);
4914 tree dest_addr, dest;
4916 if (SSE_REGNO_P (REGNO (reg)))
4918 src_addr = sse_addr;
4919 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4923 src_addr = int_addr;
4924 src_offset = REGNO (reg) * 8;
4926 src_addr = fold_convert (addr_type, src_addr);
4927 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4928 size_int (src_offset));
4929 src = build_va_arg_indirect_ref (src_addr);
4931 dest_addr = fold_convert (addr_type, addr);
4932 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4933 size_int (INTVAL (XEXP (slot, 1))));
4934 dest = build_va_arg_indirect_ref (dest_addr);
4936 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4937 gimplify_and_add (t, pre_p);
4943 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4944 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4945 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4946 gimplify_and_add (t, pre_p);
4950 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4951 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4952 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4953 gimplify_and_add (t, pre_p);
4956 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4957 gimplify_and_add (t, pre_p);
4959 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4960 append_to_statement_list (t, pre_p);
4963 /* ... otherwise out of the overflow area. */
4965 /* Care for on-stack alignment if needed. */
4966 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4967 || integer_zerop (TYPE_SIZE (type)))
4971 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4972 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4973 build_int_cst (TREE_TYPE (ovf), align - 1));
4974 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4975 build_int_cst (TREE_TYPE (t), -align));
4977 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4979 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4980 gimplify_and_add (t2, pre_p);
4982 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4983 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4984 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4985 gimplify_and_add (t, pre_p);
4989 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4990 append_to_statement_list (t, pre_p);
4993 ptrtype = build_pointer_type (type);
4994 addr = fold_convert (ptrtype, addr);
4997 addr = build_va_arg_indirect_ref (addr);
4998 return build_va_arg_indirect_ref (addr);
5001 /* Return nonzero if OPNUM's MEM should be matched
5002 in movabs* patterns. */
5005 ix86_check_movabs (rtx insn, int opnum)
5009 set = PATTERN (insn);
5010 if (GET_CODE (set) == PARALLEL)
5011 set = XVECEXP (set, 0, 0);
5012 gcc_assert (GET_CODE (set) == SET);
5013 mem = XEXP (set, opnum);
5014 while (GET_CODE (mem) == SUBREG)
5015 mem = SUBREG_REG (mem);
5016 gcc_assert (MEM_P (mem));
5017 return (volatile_ok || !MEM_VOLATILE_P (mem));
5020 /* Initialize the table of extra 80387 mathematical constants. */
5023 init_ext_80387_constants (void)
5025 static const char * cst[5] =
5027 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5028 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5029 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5030 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5031 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5035 for (i = 0; i < 5; i++)
5037 real_from_string (&ext_80387_constants_table[i], cst[i]);
5038 /* Ensure each constant is rounded to XFmode precision. */
5039 real_convert (&ext_80387_constants_table[i],
5040 XFmode, &ext_80387_constants_table[i]);
5043 ext_80387_constants_init = 1;
5046 /* Return true if the constant is something that can be loaded with
5047 a special instruction. */
5050 standard_80387_constant_p (rtx x)
5052 enum machine_mode mode = GET_MODE (x);
5056 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5059 if (x == CONST0_RTX (mode))
5061 if (x == CONST1_RTX (mode))
5064 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5066 /* For XFmode constants, try to find a special 80387 instruction when
5067 optimizing for size or on those CPUs that benefit from them. */
5069 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5073 if (! ext_80387_constants_init)
5074 init_ext_80387_constants ();
5076 for (i = 0; i < 5; i++)
5077 if (real_identical (&r, &ext_80387_constants_table[i]))
5081 /* Load of the constant -0.0 or -1.0 will be split as
5082 fldz;fchs or fld1;fchs sequence. */
5083 if (real_isnegzero (&r))
5085 if (real_identical (&r, &dconstm1))
5091 /* Return the opcode of the special instruction to be used to load
5095 standard_80387_constant_opcode (rtx x)
5097 switch (standard_80387_constant_p (x))
5121 /* Return the CONST_DOUBLE representing the 80387 constant that is
5122 loaded by the specified special instruction. The argument IDX
5123 matches the return value from standard_80387_constant_p. */
5126 standard_80387_constant_rtx (int idx)
5130 if (! ext_80387_constants_init)
5131 init_ext_80387_constants ();
5147 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5151 /* Return 1 if mode is a valid mode for sse. */
5153 standard_sse_mode_p (enum machine_mode mode)
5170 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5173 standard_sse_constant_p (rtx x)
5175 enum machine_mode mode = GET_MODE (x);
5177 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5179 if (vector_all_ones_operand (x, mode)
5180 && standard_sse_mode_p (mode))
5181 return TARGET_SSE2 ? 2 : -1;
5186 /* Return the opcode of the special instruction to be used to load
5190 standard_sse_constant_opcode (rtx insn, rtx x)
5192 switch (standard_sse_constant_p (x))
5195 if (get_attr_mode (insn) == MODE_V4SF)
5196 return "xorps\t%0, %0";
5197 else if (get_attr_mode (insn) == MODE_V2DF)
5198 return "xorpd\t%0, %0";
5200 return "pxor\t%0, %0";
5202 return "pcmpeqd\t%0, %0";
5207 /* Returns 1 if OP contains a symbol reference */
5210 symbolic_reference_mentioned_p (rtx op)
5215 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5218 fmt = GET_RTX_FORMAT (GET_CODE (op));
5219 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5225 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5226 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5230 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5237 /* Return 1 if it is appropriate to emit `ret' instructions in the
5238 body of a function. Do this only if the epilogue is simple, needing a
5239 couple of insns. Prior to reloading, we can't tell how many registers
5240 must be saved, so return 0 then. Return 0 if there is no frame
5241 marker to de-allocate. */
5244 ix86_can_use_return_insn_p (void)
5246 struct ix86_frame frame;
5248 if (! reload_completed || frame_pointer_needed)
5251 /* Don't allow more than 32 pop, since that's all we can do
5252 with one instruction. */
5253 if (current_function_pops_args
5254 && current_function_args_size >= 32768)
5257 ix86_compute_frame_layout (&frame);
5258 return frame.to_allocate == 0 && frame.nregs == 0;
5261 /* Value should be nonzero if functions must have frame pointers.
5262 Zero means the frame pointer need not be set up (and parms may
5263 be accessed via the stack pointer) in functions that seem suitable. */
5266 ix86_frame_pointer_required (void)
5268 /* If we accessed previous frames, then the generated code expects
5269 to be able to access the saved ebp value in our frame. */
5270 if (cfun->machine->accesses_prev_frame)
5273 /* Several x86 os'es need a frame pointer for other reasons,
5274 usually pertaining to setjmp. */
5275 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5278 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5279 the frame pointer by default. Turn it back on now if we've not
5280 got a leaf function. */
5281 if (TARGET_OMIT_LEAF_FRAME_POINTER
5282 && (!current_function_is_leaf
5283 || ix86_current_function_calls_tls_descriptor))
5286 if (current_function_profile)
5292 /* Record that the current function accesses previous call frames. */
5295 ix86_setup_frame_addresses (void)
5297 cfun->machine->accesses_prev_frame = 1;
5300 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5301 # define USE_HIDDEN_LINKONCE 1
5303 # define USE_HIDDEN_LINKONCE 0
5306 static int pic_labels_used;
5308 /* Fills in the label name that should be used for a pc thunk for
5309 the given register. */
5312 get_pc_thunk_name (char name[32], unsigned int regno)
5314 gcc_assert (!TARGET_64BIT);
5316 if (USE_HIDDEN_LINKONCE)
5317 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5319 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5323 /* This function generates code for -fpic that loads %ebx with
5324 the return address of the caller and then returns. */
5327 ix86_file_end (void)
5332 for (regno = 0; regno < 8; ++regno)
5336 if (! ((pic_labels_used >> regno) & 1))
5339 get_pc_thunk_name (name, regno);
5344 switch_to_section (darwin_sections[text_coal_section]);
5345 fputs ("\t.weak_definition\t", asm_out_file);
5346 assemble_name (asm_out_file, name);
5347 fputs ("\n\t.private_extern\t", asm_out_file);
5348 assemble_name (asm_out_file, name);
5349 fputs ("\n", asm_out_file);
5350 ASM_OUTPUT_LABEL (asm_out_file, name);
5354 if (USE_HIDDEN_LINKONCE)
5358 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5360 TREE_PUBLIC (decl) = 1;
5361 TREE_STATIC (decl) = 1;
5362 DECL_ONE_ONLY (decl) = 1;
5364 (*targetm.asm_out.unique_section) (decl, 0);
5365 switch_to_section (get_named_section (decl, NULL, 0));
5367 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5368 fputs ("\t.hidden\t", asm_out_file);
5369 assemble_name (asm_out_file, name);
5370 fputc ('\n', asm_out_file);
5371 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5375 switch_to_section (text_section);
5376 ASM_OUTPUT_LABEL (asm_out_file, name);
5379 xops[0] = gen_rtx_REG (SImode, regno);
5380 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5381 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5382 output_asm_insn ("ret", xops);
5385 if (NEED_INDICATE_EXEC_STACK)
5386 file_end_indicate_exec_stack ();
5389 /* Emit code for the SET_GOT patterns. */
5392 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5398 if (TARGET_VXWORKS_RTP && flag_pic)
5400 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5401 xops[2] = gen_rtx_MEM (Pmode,
5402 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5403 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5405 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5406 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5407 an unadorned address. */
5408 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5409 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5410 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5414 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5416 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5418 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5421 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5423 output_asm_insn ("call\t%a2", xops);
5426 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5427 is what will be referenced by the Mach-O PIC subsystem. */
5429 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5432 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5433 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5436 output_asm_insn ("pop{l}\t%0", xops);
5441 get_pc_thunk_name (name, REGNO (dest));
5442 pic_labels_used |= 1 << REGNO (dest);
5444 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5445 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5446 output_asm_insn ("call\t%X2", xops);
5447 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5448 is what will be referenced by the Mach-O PIC subsystem. */
5451 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5453 targetm.asm_out.internal_label (asm_out_file, "L",
5454 CODE_LABEL_NUMBER (label));
5461 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5462 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5464 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5469 /* Generate an "push" pattern for input ARG. */
5474 return gen_rtx_SET (VOIDmode,
5476 gen_rtx_PRE_DEC (Pmode,
5477 stack_pointer_rtx)),
5481 /* Return >= 0 if there is an unused call-clobbered register available
5482 for the entire function. */
5485 ix86_select_alt_pic_regnum (void)
5487 if (current_function_is_leaf && !current_function_profile
5488 && !ix86_current_function_calls_tls_descriptor)
5491 for (i = 2; i >= 0; --i)
5492 if (!regs_ever_live[i])
5496 return INVALID_REGNUM;
5499 /* Return 1 if we need to save REGNO. */
5501 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5503 if (pic_offset_table_rtx
5504 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5505 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5506 || current_function_profile
5507 || current_function_calls_eh_return
5508 || current_function_uses_const_pool))
5510 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5515 if (current_function_calls_eh_return && maybe_eh_return)
5520 unsigned test = EH_RETURN_DATA_REGNO (i);
5521 if (test == INVALID_REGNUM)
5528 if (cfun->machine->force_align_arg_pointer
5529 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5532 return (regs_ever_live[regno]
5533 && !call_used_regs[regno]
5534 && !fixed_regs[regno]
5535 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5538 /* Return number of registers to be saved on the stack. */
5541 ix86_nsaved_regs (void)
5546 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5547 if (ix86_save_reg (regno, true))
5552 /* Return the offset between two registers, one to be eliminated, and the other
5553 its replacement, at the start of a routine. */
5556 ix86_initial_elimination_offset (int from, int to)
5558 struct ix86_frame frame;
5559 ix86_compute_frame_layout (&frame);
5561 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5562 return frame.hard_frame_pointer_offset;
5563 else if (from == FRAME_POINTER_REGNUM
5564 && to == HARD_FRAME_POINTER_REGNUM)
5565 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5568 gcc_assert (to == STACK_POINTER_REGNUM);
5570 if (from == ARG_POINTER_REGNUM)
5571 return frame.stack_pointer_offset;
5573 gcc_assert (from == FRAME_POINTER_REGNUM);
5574 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5578 /* Fill structure ix86_frame about frame of currently computed function. */
5581 ix86_compute_frame_layout (struct ix86_frame *frame)
5583 HOST_WIDE_INT total_size;
5584 unsigned int stack_alignment_needed;
5585 HOST_WIDE_INT offset;
5586 unsigned int preferred_alignment;
5587 HOST_WIDE_INT size = get_frame_size ();
5589 frame->nregs = ix86_nsaved_regs ();
5592 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5593 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5595 /* During reload iteration the amount of registers saved can change.
5596 Recompute the value as needed. Do not recompute when amount of registers
5597 didn't change as reload does multiple calls to the function and does not
5598 expect the decision to change within single iteration. */
5600 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5602 int count = frame->nregs;
5604 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5605 /* The fast prologue uses move instead of push to save registers. This
5606 is significantly longer, but also executes faster as modern hardware
5607 can execute the moves in parallel, but can't do that for push/pop.
5609 Be careful about choosing what prologue to emit: When function takes
5610 many instructions to execute we may use slow version as well as in
5611 case function is known to be outside hot spot (this is known with
5612 feedback only). Weight the size of function by number of registers
5613 to save as it is cheap to use one or two push instructions but very
5614 slow to use many of them. */
5616 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5617 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5618 || (flag_branch_probabilities
5619 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5620 cfun->machine->use_fast_prologue_epilogue = false;
5622 cfun->machine->use_fast_prologue_epilogue
5623 = !expensive_function_p (count);
5625 if (TARGET_PROLOGUE_USING_MOVE
5626 && cfun->machine->use_fast_prologue_epilogue)
5627 frame->save_regs_using_mov = true;
5629 frame->save_regs_using_mov = false;
5632 /* Skip return address and saved base pointer. */
5633 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5635 frame->hard_frame_pointer_offset = offset;
5637 /* Do some sanity checking of stack_alignment_needed and
5638 preferred_alignment, since i386 port is the only using those features
5639 that may break easily. */
5641 gcc_assert (!size || stack_alignment_needed);
5642 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5643 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5644 gcc_assert (stack_alignment_needed
5645 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5647 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5648 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5650 /* Register save area */
5651 offset += frame->nregs * UNITS_PER_WORD;
5654 if (ix86_save_varrargs_registers)
5656 offset += X86_64_VARARGS_SIZE;
5657 frame->va_arg_size = X86_64_VARARGS_SIZE;
5660 frame->va_arg_size = 0;
5662 /* Align start of frame for local function. */
5663 frame->padding1 = ((offset + stack_alignment_needed - 1)
5664 & -stack_alignment_needed) - offset;
5666 offset += frame->padding1;
5668 /* Frame pointer points here. */
5669 frame->frame_pointer_offset = offset;
5673 /* Add outgoing arguments area. Can be skipped if we eliminated
5674 all the function calls as dead code.
5675 Skipping is however impossible when function calls alloca. Alloca
5676 expander assumes that last current_function_outgoing_args_size
5677 of stack frame are unused. */
5678 if (ACCUMULATE_OUTGOING_ARGS
5679 && (!current_function_is_leaf || current_function_calls_alloca
5680 || ix86_current_function_calls_tls_descriptor))
5682 offset += current_function_outgoing_args_size;
5683 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5686 frame->outgoing_arguments_size = 0;
5688 /* Align stack boundary. Only needed if we're calling another function
5690 if (!current_function_is_leaf || current_function_calls_alloca
5691 || ix86_current_function_calls_tls_descriptor)
5692 frame->padding2 = ((offset + preferred_alignment - 1)
5693 & -preferred_alignment) - offset;
5695 frame->padding2 = 0;
5697 offset += frame->padding2;
5699 /* We've reached end of stack frame. */
5700 frame->stack_pointer_offset = offset;
5702 /* Size prologue needs to allocate. */
5703 frame->to_allocate =
5704 (size + frame->padding1 + frame->padding2
5705 + frame->outgoing_arguments_size + frame->va_arg_size);
5707 if ((!frame->to_allocate && frame->nregs <= 1)
5708 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5709 frame->save_regs_using_mov = false;
5711 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5712 && current_function_is_leaf
5713 && !ix86_current_function_calls_tls_descriptor)
5715 frame->red_zone_size = frame->to_allocate;
5716 if (frame->save_regs_using_mov)
5717 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5718 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5719 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5722 frame->red_zone_size = 0;
5723 frame->to_allocate -= frame->red_zone_size;
5724 frame->stack_pointer_offset -= frame->red_zone_size;
5726 fprintf (stderr, "\n");
5727 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5728 fprintf (stderr, "size: %ld\n", (long)size);
5729 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5730 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5731 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5732 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5733 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5734 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5735 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5736 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5737 (long)frame->hard_frame_pointer_offset);
5738 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5739 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5740 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5741 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5745 /* Emit code to save registers in the prologue. */
5748 ix86_emit_save_regs (void)
5753 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5754 if (ix86_save_reg (regno, true))
5756 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5757 RTX_FRAME_RELATED_P (insn) = 1;
5761 /* Emit code to save registers using MOV insns. First register
5762 is restored from POINTER + OFFSET. */
5764 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5769 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5770 if (ix86_save_reg (regno, true))
5772 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5774 gen_rtx_REG (Pmode, regno));
5775 RTX_FRAME_RELATED_P (insn) = 1;
5776 offset += UNITS_PER_WORD;
5780 /* Expand prologue or epilogue stack adjustment.
5781 The pattern exist to put a dependency on all ebp-based memory accesses.
5782 STYLE should be negative if instructions should be marked as frame related,
5783 zero if %r11 register is live and cannot be freely used and positive
5787 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5792 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5793 else if (x86_64_immediate_operand (offset, DImode))
5794 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5798 /* r11 is used by indirect sibcall return as well, set before the
5799 epilogue and used after the epilogue. ATM indirect sibcall
5800 shouldn't be used together with huge frame sizes in one
5801 function because of the frame_size check in sibcall.c. */
5803 r11 = gen_rtx_REG (DImode, R11_REG);
5804 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5806 RTX_FRAME_RELATED_P (insn) = 1;
5807 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5811 RTX_FRAME_RELATED_P (insn) = 1;
5814 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5817 ix86_internal_arg_pointer (void)
5819 bool has_force_align_arg_pointer =
5820 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5821 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5822 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5823 && DECL_NAME (current_function_decl)
5824 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5825 && DECL_FILE_SCOPE_P (current_function_decl))
5826 || ix86_force_align_arg_pointer
5827 || has_force_align_arg_pointer)
5829 /* Nested functions can't realign the stack due to a register
5831 if (DECL_CONTEXT (current_function_decl)
5832 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5834 if (ix86_force_align_arg_pointer)
5835 warning (0, "-mstackrealign ignored for nested functions");
5836 if (has_force_align_arg_pointer)
5837 error ("%s not supported for nested functions",
5838 ix86_force_align_arg_pointer_string);
5839 return virtual_incoming_args_rtx;
5841 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5842 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5845 return virtual_incoming_args_rtx;
5848 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5849 This is called from dwarf2out.c to emit call frame instructions
5850 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5852 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5854 rtx unspec = SET_SRC (pattern);
5855 gcc_assert (GET_CODE (unspec) == UNSPEC);
5859 case UNSPEC_REG_SAVE:
5860 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5861 SET_DEST (pattern));
5863 case UNSPEC_DEF_CFA:
5864 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5865 INTVAL (XVECEXP (unspec, 0, 0)));
5872 /* Expand the prologue into a bunch of separate insns. */
5875 ix86_expand_prologue (void)
5879 struct ix86_frame frame;
5880 HOST_WIDE_INT allocate;
5882 ix86_compute_frame_layout (&frame);
5884 if (cfun->machine->force_align_arg_pointer)
5888 /* Grab the argument pointer. */
5889 x = plus_constant (stack_pointer_rtx, 4);
5890 y = cfun->machine->force_align_arg_pointer;
5891 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5892 RTX_FRAME_RELATED_P (insn) = 1;
5894 /* The unwind info consists of two parts: install the fafp as the cfa,
5895 and record the fafp as the "save register" of the stack pointer.
5896 The later is there in order that the unwinder can see where it
5897 should restore the stack pointer across the and insn. */
5898 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5899 x = gen_rtx_SET (VOIDmode, y, x);
5900 RTX_FRAME_RELATED_P (x) = 1;
5901 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5903 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5904 RTX_FRAME_RELATED_P (y) = 1;
5905 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5906 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5907 REG_NOTES (insn) = x;
5909 /* Align the stack. */
5910 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5913 /* And here we cheat like madmen with the unwind info. We force the
5914 cfa register back to sp+4, which is exactly what it was at the
5915 start of the function. Re-pushing the return address results in
5916 the return at the same spot relative to the cfa, and thus is
5917 correct wrt the unwind info. */
5918 x = cfun->machine->force_align_arg_pointer;
5919 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5920 insn = emit_insn (gen_push (x));
5921 RTX_FRAME_RELATED_P (insn) = 1;
5924 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5925 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5926 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5927 REG_NOTES (insn) = x;
5930 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5931 slower on all targets. Also sdb doesn't like it. */
5933 if (frame_pointer_needed)
5935 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5936 RTX_FRAME_RELATED_P (insn) = 1;
5938 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5939 RTX_FRAME_RELATED_P (insn) = 1;
5942 allocate = frame.to_allocate;
5944 if (!frame.save_regs_using_mov)
5945 ix86_emit_save_regs ();
5947 allocate += frame.nregs * UNITS_PER_WORD;
5949 /* When using red zone we may start register saving before allocating
5950 the stack frame saving one cycle of the prologue. */
5951 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5952 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5953 : stack_pointer_rtx,
5954 -frame.nregs * UNITS_PER_WORD);
5958 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5959 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5960 GEN_INT (-allocate), -1);
5963 /* Only valid for Win32. */
5964 rtx eax = gen_rtx_REG (Pmode, 0);
5968 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5970 if (TARGET_64BIT_MS_ABI)
5973 eax_live = ix86_eax_live_at_start_p ();
5977 emit_insn (gen_push (eax));
5978 allocate -= UNITS_PER_WORD;
5981 emit_move_insn (eax, GEN_INT (allocate));
5984 insn = gen_allocate_stack_worker_64 (eax);
5986 insn = gen_allocate_stack_worker_32 (eax);
5987 insn = emit_insn (insn);
5988 RTX_FRAME_RELATED_P (insn) = 1;
5989 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5990 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5991 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5992 t, REG_NOTES (insn));
5996 if (frame_pointer_needed)
5997 t = plus_constant (hard_frame_pointer_rtx,
6000 - frame.nregs * UNITS_PER_WORD);
6002 t = plus_constant (stack_pointer_rtx, allocate);
6003 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6007 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6009 if (!frame_pointer_needed || !frame.to_allocate)
6010 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6012 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6013 -frame.nregs * UNITS_PER_WORD);
6016 pic_reg_used = false;
6017 if (pic_offset_table_rtx
6018 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6019 || current_function_profile))
6021 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6023 if (alt_pic_reg_used != INVALID_REGNUM)
6024 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6026 pic_reg_used = true;
6033 if (ix86_cmodel == CM_LARGE_PIC)
6035 rtx tmp_reg = gen_rtx_REG (DImode,
6036 FIRST_REX_INT_REG + 3 /* R11 */);
6037 rtx label = gen_label_rtx ();
6039 LABEL_PRESERVE_P (label) = 1;
6040 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6041 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6042 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6043 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6044 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6045 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6046 pic_offset_table_rtx, tmp_reg));
6049 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6052 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6054 /* Even with accurate pre-reload life analysis, we can wind up
6055 deleting all references to the pic register after reload.
6056 Consider if cross-jumping unifies two sides of a branch
6057 controlled by a comparison vs the only read from a global.
6058 In which case, allow the set_got to be deleted, though we're
6059 too late to do anything about the ebx save in the prologue. */
6060 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6063 /* Prevent function calls from be scheduled before the call to mcount.
6064 In the pic_reg_used case, make sure that the got load isn't deleted. */
6065 if (current_function_profile)
6066 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6069 /* Emit code to restore saved registers using MOV insns. First register
6070 is restored from POINTER + OFFSET. */
6072 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6073 int maybe_eh_return)
6076 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6078 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6079 if (ix86_save_reg (regno, maybe_eh_return))
6081 /* Ensure that adjust_address won't be forced to produce pointer
6082 out of range allowed by x86-64 instruction set. */
6083 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6087 r11 = gen_rtx_REG (DImode, R11_REG);
6088 emit_move_insn (r11, GEN_INT (offset));
6089 emit_insn (gen_adddi3 (r11, r11, pointer));
6090 base_address = gen_rtx_MEM (Pmode, r11);
6093 emit_move_insn (gen_rtx_REG (Pmode, regno),
6094 adjust_address (base_address, Pmode, offset));
6095 offset += UNITS_PER_WORD;
6099 /* Restore function stack, frame, and registers. */
6102 ix86_expand_epilogue (int style)
6105 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6106 struct ix86_frame frame;
6107 HOST_WIDE_INT offset;
6109 ix86_compute_frame_layout (&frame);
6111 /* Calculate start of saved registers relative to ebp. Special care
6112 must be taken for the normal return case of a function using
6113 eh_return: the eax and edx registers are marked as saved, but not
6114 restored along this path. */
6115 offset = frame.nregs;
6116 if (current_function_calls_eh_return && style != 2)
6118 offset *= -UNITS_PER_WORD;
6120 /* If we're only restoring one register and sp is not valid then
6121 using a move instruction to restore the register since it's
6122 less work than reloading sp and popping the register.
6124 The default code result in stack adjustment using add/lea instruction,
6125 while this code results in LEAVE instruction (or discrete equivalent),
6126 so it is profitable in some other cases as well. Especially when there
6127 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6128 and there is exactly one register to pop. This heuristic may need some
6129 tuning in future. */
6130 if ((!sp_valid && frame.nregs <= 1)
6131 || (TARGET_EPILOGUE_USING_MOVE
6132 && cfun->machine->use_fast_prologue_epilogue
6133 && (frame.nregs > 1 || frame.to_allocate))
6134 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6135 || (frame_pointer_needed && TARGET_USE_LEAVE
6136 && cfun->machine->use_fast_prologue_epilogue
6137 && frame.nregs == 1)
6138 || current_function_calls_eh_return)
6140 /* Restore registers. We can use ebp or esp to address the memory
6141 locations. If both are available, default to ebp, since offsets
6142 are known to be small. Only exception is esp pointing directly to the
6143 end of block of saved registers, where we may simplify addressing
6146 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6147 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6148 frame.to_allocate, style == 2);
6150 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6151 offset, style == 2);
6153 /* eh_return epilogues need %ecx added to the stack pointer. */
6156 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6158 if (frame_pointer_needed)
6160 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6161 tmp = plus_constant (tmp, UNITS_PER_WORD);
6162 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6164 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6165 emit_move_insn (hard_frame_pointer_rtx, tmp);
6167 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6172 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6173 tmp = plus_constant (tmp, (frame.to_allocate
6174 + frame.nregs * UNITS_PER_WORD));
6175 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6178 else if (!frame_pointer_needed)
6179 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6180 GEN_INT (frame.to_allocate
6181 + frame.nregs * UNITS_PER_WORD),
6183 /* If not an i386, mov & pop is faster than "leave". */
6184 else if (TARGET_USE_LEAVE || optimize_size
6185 || !cfun->machine->use_fast_prologue_epilogue)
6186 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6189 pro_epilogue_adjust_stack (stack_pointer_rtx,
6190 hard_frame_pointer_rtx,
6193 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6195 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6200 /* First step is to deallocate the stack frame so that we can
6201 pop the registers. */
6204 gcc_assert (frame_pointer_needed);
6205 pro_epilogue_adjust_stack (stack_pointer_rtx,
6206 hard_frame_pointer_rtx,
6207 GEN_INT (offset), style);
6209 else if (frame.to_allocate)
6210 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6211 GEN_INT (frame.to_allocate), style);
6213 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6214 if (ix86_save_reg (regno, false))
6217 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6219 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6221 if (frame_pointer_needed)
6223 /* Leave results in shorter dependency chains on CPUs that are
6224 able to grok it fast. */
6225 if (TARGET_USE_LEAVE)
6226 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6227 else if (TARGET_64BIT)
6228 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6230 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6234 if (cfun->machine->force_align_arg_pointer)
6236 emit_insn (gen_addsi3 (stack_pointer_rtx,
6237 cfun->machine->force_align_arg_pointer,
6241 /* Sibcall epilogues don't want a return instruction. */
6245 if (current_function_pops_args && current_function_args_size)
6247 rtx popc = GEN_INT (current_function_pops_args);
6249 /* i386 can only pop 64K bytes. If asked to pop more, pop
6250 return address, do explicit add, and jump indirectly to the
6253 if (current_function_pops_args >= 65536)
6255 rtx ecx = gen_rtx_REG (SImode, 2);
6257 /* There is no "pascal" calling convention in any 64bit ABI. */
6258 gcc_assert (!TARGET_64BIT);
6260 emit_insn (gen_popsi1 (ecx));
6261 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6262 emit_jump_insn (gen_return_indirect_internal (ecx));
6265 emit_jump_insn (gen_return_pop_internal (popc));
6268 emit_jump_insn (gen_return_internal ());
6271 /* Reset from the function's potential modifications. */
6274 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6275 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6277 if (pic_offset_table_rtx)
6278 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6280 /* Mach-O doesn't support labels at the end of objects, so if
6281 it looks like we might want one, insert a NOP. */
6283 rtx insn = get_last_insn ();
6286 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6287 insn = PREV_INSN (insn);
6291 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6292 fputs ("\tnop\n", file);
6298 /* Extract the parts of an RTL expression that is a valid memory address
6299 for an instruction. Return 0 if the structure of the address is
6300 grossly off. Return -1 if the address contains ASHIFT, so it is not
6301 strictly valid, but still used for computing length of lea instruction. */
6304 ix86_decompose_address (rtx addr, struct ix86_address *out)
6306 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6307 rtx base_reg, index_reg;
6308 HOST_WIDE_INT scale = 1;
6309 rtx scale_rtx = NULL_RTX;
6311 enum ix86_address_seg seg = SEG_DEFAULT;
6313 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6315 else if (GET_CODE (addr) == PLUS)
6325 addends[n++] = XEXP (op, 1);
6328 while (GET_CODE (op) == PLUS);
6333 for (i = n; i >= 0; --i)
6336 switch (GET_CODE (op))
6341 index = XEXP (op, 0);
6342 scale_rtx = XEXP (op, 1);
6346 if (XINT (op, 1) == UNSPEC_TP
6347 && TARGET_TLS_DIRECT_SEG_REFS
6348 && seg == SEG_DEFAULT)
6349 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6378 else if (GET_CODE (addr) == MULT)
6380 index = XEXP (addr, 0); /* index*scale */
6381 scale_rtx = XEXP (addr, 1);
6383 else if (GET_CODE (addr) == ASHIFT)
6387 /* We're called for lea too, which implements ashift on occasion. */
6388 index = XEXP (addr, 0);
6389 tmp = XEXP (addr, 1);
6390 if (!CONST_INT_P (tmp))
6392 scale = INTVAL (tmp);
6393 if ((unsigned HOST_WIDE_INT) scale > 3)
6399 disp = addr; /* displacement */
6401 /* Extract the integral value of scale. */
6404 if (!CONST_INT_P (scale_rtx))
6406 scale = INTVAL (scale_rtx);
6409 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6410 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6412 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6413 if (base_reg && index_reg && scale == 1
6414 && (index_reg == arg_pointer_rtx
6415 || index_reg == frame_pointer_rtx
6416 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6419 tmp = base, base = index, index = tmp;
6420 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6423 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6424 if ((base_reg == hard_frame_pointer_rtx
6425 || base_reg == frame_pointer_rtx
6426 || base_reg == arg_pointer_rtx) && !disp)
6429 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6430 Avoid this by transforming to [%esi+0]. */
6431 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6432 && base_reg && !index_reg && !disp
6434 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6437 /* Special case: encode reg+reg instead of reg*2. */
6438 if (!base && index && scale && scale == 2)
6439 base = index, base_reg = index_reg, scale = 1;
6441 /* Special case: scaling cannot be encoded without base or displacement. */
6442 if (!base && !disp && index && scale != 1)
6454 /* Return cost of the memory address x.
6455 For i386, it is better to use a complex address than let gcc copy
6456 the address into a reg and make a new pseudo. But not if the address
6457 requires to two regs - that would mean more pseudos with longer
6460 ix86_address_cost (rtx x)
6462 struct ix86_address parts;
6464 int ok = ix86_decompose_address (x, &parts);
6468 if (parts.base && GET_CODE (parts.base) == SUBREG)
6469 parts.base = SUBREG_REG (parts.base);
6470 if (parts.index && GET_CODE (parts.index) == SUBREG)
6471 parts.index = SUBREG_REG (parts.index);
6473 /* More complex memory references are better. */
6474 if (parts.disp && parts.disp != const0_rtx)
6476 if (parts.seg != SEG_DEFAULT)
6479 /* Attempt to minimize number of registers in the address. */
6481 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6483 && (!REG_P (parts.index)
6484 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6488 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6490 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6491 && parts.base != parts.index)
6494 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6495 since it's predecode logic can't detect the length of instructions
6496 and it degenerates to vector decoded. Increase cost of such
6497 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6498 to split such addresses or even refuse such addresses at all.
6500 Following addressing modes are affected:
6505 The first and last case may be avoidable by explicitly coding the zero in
6506 memory address, but I don't have AMD-K6 machine handy to check this
6510 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6511 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6512 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6518 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6519 this is used for to form addresses to local data when -fPIC is in
6523 darwin_local_data_pic (rtx disp)
6525 if (GET_CODE (disp) == MINUS)
6527 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6528 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6529 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6531 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6532 if (! strcmp (sym_name, "<pic base>"))
6540 /* Determine if a given RTX is a valid constant. We already know this
6541 satisfies CONSTANT_P. */
6544 legitimate_constant_p (rtx x)
6546 switch (GET_CODE (x))
6551 if (GET_CODE (x) == PLUS)
6553 if (!CONST_INT_P (XEXP (x, 1)))
6558 if (TARGET_MACHO && darwin_local_data_pic (x))
6561 /* Only some unspecs are valid as "constants". */
6562 if (GET_CODE (x) == UNSPEC)
6563 switch (XINT (x, 1))
6568 return TARGET_64BIT;
6571 x = XVECEXP (x, 0, 0);
6572 return (GET_CODE (x) == SYMBOL_REF
6573 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6575 x = XVECEXP (x, 0, 0);
6576 return (GET_CODE (x) == SYMBOL_REF
6577 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6582 /* We must have drilled down to a symbol. */
6583 if (GET_CODE (x) == LABEL_REF)
6585 if (GET_CODE (x) != SYMBOL_REF)
6590 /* TLS symbols are never valid. */
6591 if (SYMBOL_REF_TLS_MODEL (x))
6594 /* DLLIMPORT symbols are never valid. */
6595 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6596 && SYMBOL_REF_DLLIMPORT_P (x))
6601 if (GET_MODE (x) == TImode
6602 && x != CONST0_RTX (TImode)
6608 if (x == CONST0_RTX (GET_MODE (x)))
6616 /* Otherwise we handle everything else in the move patterns. */
6620 /* Determine if it's legal to put X into the constant pool. This
6621 is not possible for the address of thread-local symbols, which
6622 is checked above. */
6625 ix86_cannot_force_const_mem (rtx x)
6627 /* We can always put integral constants and vectors in memory. */
6628 switch (GET_CODE (x))
6638 return !legitimate_constant_p (x);
6641 /* Determine if a given RTX is a valid constant address. */
6644 constant_address_p (rtx x)
6646 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6649 /* Nonzero if the constant value X is a legitimate general operand
6650 when generating PIC code. It is given that flag_pic is on and
6651 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6654 legitimate_pic_operand_p (rtx x)
6658 switch (GET_CODE (x))
6661 inner = XEXP (x, 0);
6662 if (GET_CODE (inner) == PLUS
6663 && CONST_INT_P (XEXP (inner, 1)))
6664 inner = XEXP (inner, 0);
6666 /* Only some unspecs are valid as "constants". */
6667 if (GET_CODE (inner) == UNSPEC)
6668 switch (XINT (inner, 1))
6673 return TARGET_64BIT;
6675 x = XVECEXP (inner, 0, 0);
6676 return (GET_CODE (x) == SYMBOL_REF
6677 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6685 return legitimate_pic_address_disp_p (x);
6692 /* Determine if a given CONST RTX is a valid memory displacement
6696 legitimate_pic_address_disp_p (rtx disp)
6700 /* In 64bit mode we can allow direct addresses of symbols and labels
6701 when they are not dynamic symbols. */
6704 rtx op0 = disp, op1;
6706 switch (GET_CODE (disp))
6712 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6714 op0 = XEXP (XEXP (disp, 0), 0);
6715 op1 = XEXP (XEXP (disp, 0), 1);
6716 if (!CONST_INT_P (op1)
6717 || INTVAL (op1) >= 16*1024*1024
6718 || INTVAL (op1) < -16*1024*1024)
6720 if (GET_CODE (op0) == LABEL_REF)
6722 if (GET_CODE (op0) != SYMBOL_REF)
6727 /* TLS references should always be enclosed in UNSPEC. */
6728 if (SYMBOL_REF_TLS_MODEL (op0))
6730 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6731 && ix86_cmodel != CM_LARGE_PIC)
6739 if (GET_CODE (disp) != CONST)
6741 disp = XEXP (disp, 0);
6745 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6746 of GOT tables. We should not need these anyway. */
6747 if (GET_CODE (disp) != UNSPEC
6748 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6749 && XINT (disp, 1) != UNSPEC_GOTOFF
6750 && XINT (disp, 1) != UNSPEC_PLTOFF))
6753 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6754 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6760 if (GET_CODE (disp) == PLUS)
6762 if (!CONST_INT_P (XEXP (disp, 1)))
6764 disp = XEXP (disp, 0);
6768 if (TARGET_MACHO && darwin_local_data_pic (disp))
6771 if (GET_CODE (disp) != UNSPEC)
6774 switch (XINT (disp, 1))
6779 /* We need to check for both symbols and labels because VxWorks loads
6780 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6782 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6783 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6785 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6786 While ABI specify also 32bit relocation but we don't produce it in
6787 small PIC model at all. */
6788 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6789 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6791 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6793 case UNSPEC_GOTTPOFF:
6794 case UNSPEC_GOTNTPOFF:
6795 case UNSPEC_INDNTPOFF:
6798 disp = XVECEXP (disp, 0, 0);
6799 return (GET_CODE (disp) == SYMBOL_REF
6800 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6802 disp = XVECEXP (disp, 0, 0);
6803 return (GET_CODE (disp) == SYMBOL_REF
6804 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6806 disp = XVECEXP (disp, 0, 0);
6807 return (GET_CODE (disp) == SYMBOL_REF
6808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6814 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6815 memory address for an instruction. The MODE argument is the machine mode
6816 for the MEM expression that wants to use this address.
6818 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6819 convert common non-canonical forms to canonical form so that they will
6823 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6824 rtx addr, int strict)
6826 struct ix86_address parts;
6827 rtx base, index, disp;
6828 HOST_WIDE_INT scale;
6829 const char *reason = NULL;
6830 rtx reason_rtx = NULL_RTX;
6832 if (ix86_decompose_address (addr, &parts) <= 0)
6834 reason = "decomposition failed";
6839 index = parts.index;
6841 scale = parts.scale;
6843 /* Validate base register.
6845 Don't allow SUBREG's that span more than a word here. It can lead to spill
6846 failures when the base is one word out of a two word structure, which is
6847 represented internally as a DImode int. */
6856 else if (GET_CODE (base) == SUBREG
6857 && REG_P (SUBREG_REG (base))
6858 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6860 reg = SUBREG_REG (base);
6863 reason = "base is not a register";
6867 if (GET_MODE (base) != Pmode)
6869 reason = "base is not in Pmode";
6873 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6874 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6876 reason = "base is not valid";
6881 /* Validate index register.
6883 Don't allow SUBREG's that span more than a word here -- same as above. */
6892 else if (GET_CODE (index) == SUBREG
6893 && REG_P (SUBREG_REG (index))
6894 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6896 reg = SUBREG_REG (index);
6899 reason = "index is not a register";
6903 if (GET_MODE (index) != Pmode)
6905 reason = "index is not in Pmode";
6909 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6910 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6912 reason = "index is not valid";
6917 /* Validate scale factor. */
6920 reason_rtx = GEN_INT (scale);
6923 reason = "scale without index";
6927 if (scale != 2 && scale != 4 && scale != 8)
6929 reason = "scale is not a valid multiplier";
6934 /* Validate displacement. */
6939 if (GET_CODE (disp) == CONST
6940 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6941 switch (XINT (XEXP (disp, 0), 1))
6943 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6944 used. While ABI specify also 32bit relocations, we don't produce
6945 them at all and use IP relative instead. */
6948 gcc_assert (flag_pic);
6950 goto is_legitimate_pic;
6951 reason = "64bit address unspec";
6954 case UNSPEC_GOTPCREL:
6955 gcc_assert (flag_pic);
6956 goto is_legitimate_pic;
6958 case UNSPEC_GOTTPOFF:
6959 case UNSPEC_GOTNTPOFF:
6960 case UNSPEC_INDNTPOFF:
6966 reason = "invalid address unspec";
6970 else if (SYMBOLIC_CONST (disp)
6974 && MACHOPIC_INDIRECT
6975 && !machopic_operand_p (disp)
6981 if (TARGET_64BIT && (index || base))
6983 /* foo@dtpoff(%rX) is ok. */
6984 if (GET_CODE (disp) != CONST
6985 || GET_CODE (XEXP (disp, 0)) != PLUS
6986 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6987 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6988 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6989 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6991 reason = "non-constant pic memory reference";
6995 else if (! legitimate_pic_address_disp_p (disp))
6997 reason = "displacement is an invalid pic construct";
7001 /* This code used to verify that a symbolic pic displacement
7002 includes the pic_offset_table_rtx register.
7004 While this is good idea, unfortunately these constructs may
7005 be created by "adds using lea" optimization for incorrect
7014 This code is nonsensical, but results in addressing
7015 GOT table with pic_offset_table_rtx base. We can't
7016 just refuse it easily, since it gets matched by
7017 "addsi3" pattern, that later gets split to lea in the
7018 case output register differs from input. While this
7019 can be handled by separate addsi pattern for this case
7020 that never results in lea, this seems to be easier and
7021 correct fix for crash to disable this test. */
7023 else if (GET_CODE (disp) != LABEL_REF
7024 && !CONST_INT_P (disp)
7025 && (GET_CODE (disp) != CONST
7026 || !legitimate_constant_p (disp))
7027 && (GET_CODE (disp) != SYMBOL_REF
7028 || !legitimate_constant_p (disp)))
7030 reason = "displacement is not constant";
7033 else if (TARGET_64BIT
7034 && !x86_64_immediate_operand (disp, VOIDmode))
7036 reason = "displacement is out of range";
7041 /* Everything looks valid. */
7048 /* Return a unique alias set for the GOT. */
7050 static HOST_WIDE_INT
7051 ix86_GOT_alias_set (void)
7053 static HOST_WIDE_INT set = -1;
7055 set = new_alias_set ();
7059 /* Return a legitimate reference for ORIG (an address) using the
7060 register REG. If REG is 0, a new pseudo is generated.
7062 There are two types of references that must be handled:
7064 1. Global data references must load the address from the GOT, via
7065 the PIC reg. An insn is emitted to do this load, and the reg is
7068 2. Static data references, constant pool addresses, and code labels
7069 compute the address as an offset from the GOT, whose base is in
7070 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7071 differentiate them from global data objects. The returned
7072 address is the PIC reg + an unspec constant.
7074 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7075 reg also appears in the address. */
7078 legitimize_pic_address (rtx orig, rtx reg)
7085 if (TARGET_MACHO && !TARGET_64BIT)
7088 reg = gen_reg_rtx (Pmode);
7089 /* Use the generic Mach-O PIC machinery. */
7090 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7094 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7096 else if (TARGET_64BIT
7097 && ix86_cmodel != CM_SMALL_PIC
7098 && gotoff_operand (addr, Pmode))
7101 /* This symbol may be referenced via a displacement from the PIC
7102 base address (@GOTOFF). */
7104 if (reload_in_progress)
7105 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7106 if (GET_CODE (addr) == CONST)
7107 addr = XEXP (addr, 0);
7108 if (GET_CODE (addr) == PLUS)
7110 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7112 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7115 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7116 new = gen_rtx_CONST (Pmode, new);
7118 tmpreg = gen_reg_rtx (Pmode);
7121 emit_move_insn (tmpreg, new);
7125 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7126 tmpreg, 1, OPTAB_DIRECT);
7129 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7131 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7133 /* This symbol may be referenced via a displacement from the PIC
7134 base address (@GOTOFF). */
7136 if (reload_in_progress)
7137 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7138 if (GET_CODE (addr) == CONST)
7139 addr = XEXP (addr, 0);
7140 if (GET_CODE (addr) == PLUS)
7142 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7144 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7147 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7148 new = gen_rtx_CONST (Pmode, new);
7149 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7153 emit_move_insn (reg, new);
7157 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7158 /* We can't use @GOTOFF for text labels on VxWorks;
7159 see gotoff_operand. */
7160 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7162 /* Given that we've already handled dllimport variables separately
7163 in legitimize_address, and all other variables should satisfy
7164 legitimate_pic_address_disp_p, we should never arrive here. */
7165 gcc_assert (!TARGET_64BIT_MS_ABI);
7167 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7169 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7170 new = gen_rtx_CONST (Pmode, new);
7171 new = gen_const_mem (Pmode, new);
7172 set_mem_alias_set (new, ix86_GOT_alias_set ());
7175 reg = gen_reg_rtx (Pmode);
7176 /* Use directly gen_movsi, otherwise the address is loaded
7177 into register for CSE. We don't want to CSE this addresses,
7178 instead we CSE addresses from the GOT table, so skip this. */
7179 emit_insn (gen_movsi (reg, new));
7184 /* This symbol must be referenced via a load from the
7185 Global Offset Table (@GOT). */
7187 if (reload_in_progress)
7188 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7189 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7190 new = gen_rtx_CONST (Pmode, new);
7192 new = force_reg (Pmode, new);
7193 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7194 new = gen_const_mem (Pmode, new);
7195 set_mem_alias_set (new, ix86_GOT_alias_set ());
7198 reg = gen_reg_rtx (Pmode);
7199 emit_move_insn (reg, new);
7205 if (CONST_INT_P (addr)
7206 && !x86_64_immediate_operand (addr, VOIDmode))
7210 emit_move_insn (reg, addr);
7214 new = force_reg (Pmode, addr);
7216 else if (GET_CODE (addr) == CONST)
7218 addr = XEXP (addr, 0);
7220 /* We must match stuff we generate before. Assume the only
7221 unspecs that can get here are ours. Not that we could do
7222 anything with them anyway.... */
7223 if (GET_CODE (addr) == UNSPEC
7224 || (GET_CODE (addr) == PLUS
7225 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7227 gcc_assert (GET_CODE (addr) == PLUS);
7229 if (GET_CODE (addr) == PLUS)
7231 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7233 /* Check first to see if this is a constant offset from a @GOTOFF
7234 symbol reference. */
7235 if (gotoff_operand (op0, Pmode)
7236 && CONST_INT_P (op1))
7240 if (reload_in_progress)
7241 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7242 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7244 new = gen_rtx_PLUS (Pmode, new, op1);
7245 new = gen_rtx_CONST (Pmode, new);
7246 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7250 emit_move_insn (reg, new);
7256 if (INTVAL (op1) < -16*1024*1024
7257 || INTVAL (op1) >= 16*1024*1024)
7259 if (!x86_64_immediate_operand (op1, Pmode))
7260 op1 = force_reg (Pmode, op1);
7261 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7267 base = legitimize_pic_address (XEXP (addr, 0), reg);
7268 new = legitimize_pic_address (XEXP (addr, 1),
7269 base == reg ? NULL_RTX : reg);
7271 if (CONST_INT_P (new))
7272 new = plus_constant (base, INTVAL (new));
7275 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7277 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7278 new = XEXP (new, 1);
7280 new = gen_rtx_PLUS (Pmode, base, new);
7288 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7291 get_thread_pointer (int to_reg)
7295 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7299 reg = gen_reg_rtx (Pmode);
7300 insn = gen_rtx_SET (VOIDmode, reg, tp);
7301 insn = emit_insn (insn);
7306 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7307 false if we expect this to be used for a memory address and true if
7308 we expect to load the address into a register. */
7311 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7313 rtx dest, base, off, pic, tp;
7318 case TLS_MODEL_GLOBAL_DYNAMIC:
7319 dest = gen_reg_rtx (Pmode);
7320 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7322 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7324 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7327 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7328 insns = get_insns ();
7331 CONST_OR_PURE_CALL_P (insns) = 1;
7332 emit_libcall_block (insns, dest, rax, x);
7334 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7335 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7337 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7339 if (TARGET_GNU2_TLS)
7341 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7343 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7347 case TLS_MODEL_LOCAL_DYNAMIC:
7348 base = gen_reg_rtx (Pmode);
7349 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7351 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7353 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7356 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7357 insns = get_insns ();
7360 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7361 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7362 CONST_OR_PURE_CALL_P (insns) = 1;
7363 emit_libcall_block (insns, base, rax, note);
7365 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7366 emit_insn (gen_tls_local_dynamic_base_64 (base));
7368 emit_insn (gen_tls_local_dynamic_base_32 (base));
7370 if (TARGET_GNU2_TLS)
7372 rtx x = ix86_tls_module_base ();
7374 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7375 gen_rtx_MINUS (Pmode, x, tp));
7378 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7379 off = gen_rtx_CONST (Pmode, off);
7381 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7383 if (TARGET_GNU2_TLS)
7385 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7387 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7392 case TLS_MODEL_INITIAL_EXEC:
7396 type = UNSPEC_GOTNTPOFF;
7400 if (reload_in_progress)
7401 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7402 pic = pic_offset_table_rtx;
7403 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7405 else if (!TARGET_ANY_GNU_TLS)
7407 pic = gen_reg_rtx (Pmode);
7408 emit_insn (gen_set_got (pic));
7409 type = UNSPEC_GOTTPOFF;
7414 type = UNSPEC_INDNTPOFF;
7417 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7418 off = gen_rtx_CONST (Pmode, off);
7420 off = gen_rtx_PLUS (Pmode, pic, off);
7421 off = gen_const_mem (Pmode, off);
7422 set_mem_alias_set (off, ix86_GOT_alias_set ());
7424 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7426 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7427 off = force_reg (Pmode, off);
7428 return gen_rtx_PLUS (Pmode, base, off);
7432 base = get_thread_pointer (true);
7433 dest = gen_reg_rtx (Pmode);
7434 emit_insn (gen_subsi3 (dest, base, off));
7438 case TLS_MODEL_LOCAL_EXEC:
7439 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7440 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7441 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7442 off = gen_rtx_CONST (Pmode, off);
7444 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7446 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7447 return gen_rtx_PLUS (Pmode, base, off);
7451 base = get_thread_pointer (true);
7452 dest = gen_reg_rtx (Pmode);
7453 emit_insn (gen_subsi3 (dest, base, off));
7464 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7467 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7468 htab_t dllimport_map;
7471 get_dllimport_decl (tree decl)
7473 struct tree_map *h, in;
7477 size_t namelen, prefixlen;
7483 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7485 in.hash = htab_hash_pointer (decl);
7486 in.base.from = decl;
7487 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7492 *loc = h = ggc_alloc (sizeof (struct tree_map));
7494 h->base.from = decl;
7495 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7496 DECL_ARTIFICIAL (to) = 1;
7497 DECL_IGNORED_P (to) = 1;
7498 DECL_EXTERNAL (to) = 1;
7499 TREE_READONLY (to) = 1;
7501 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7502 name = targetm.strip_name_encoding (name);
7503 if (name[0] == FASTCALL_PREFIX)
7509 prefix = "*__imp__";
7511 namelen = strlen (name);
7512 prefixlen = strlen (prefix);
7513 imp_name = alloca (namelen + prefixlen + 1);
7514 memcpy (imp_name, prefix, prefixlen);
7515 memcpy (imp_name + prefixlen, name, namelen + 1);
7517 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7518 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7519 SET_SYMBOL_REF_DECL (rtl, to);
7520 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7522 rtl = gen_const_mem (Pmode, rtl);
7523 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7525 SET_DECL_RTL (to, rtl);
7530 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7531 true if we require the result be a register. */
7534 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7539 gcc_assert (SYMBOL_REF_DECL (symbol));
7540 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7542 x = DECL_RTL (imp_decl);
7544 x = force_reg (Pmode, x);
7548 /* Try machine-dependent ways of modifying an illegitimate address
7549 to be legitimate. If we find one, return the new, valid address.
7550 This macro is used in only one place: `memory_address' in explow.c.
7552 OLDX is the address as it was before break_out_memory_refs was called.
7553 In some cases it is useful to look at this to decide what needs to be done.
7555 MODE and WIN are passed so that this macro can use
7556 GO_IF_LEGITIMATE_ADDRESS.
7558 It is always safe for this macro to do nothing. It exists to recognize
7559 opportunities to optimize the output.
7561 For the 80386, we handle X+REG by loading X into a register R and
7562 using R+REG. R will go in a general reg and indexing will be used.
7563 However, if REG is a broken-out memory address or multiplication,
7564 nothing needs to be done because REG can certainly go in a general reg.
7566 When -fpic is used, special handling is needed for symbolic references.
7567 See comments by legitimize_pic_address in i386.c for details. */
7570 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7575 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7577 return legitimize_tls_address (x, log, false);
7578 if (GET_CODE (x) == CONST
7579 && GET_CODE (XEXP (x, 0)) == PLUS
7580 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7581 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7583 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7584 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7587 if (flag_pic && SYMBOLIC_CONST (x))
7588 return legitimize_pic_address (x, 0);
7590 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7592 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7593 return legitimize_dllimport_symbol (x, true);
7594 if (GET_CODE (x) == CONST
7595 && GET_CODE (XEXP (x, 0)) == PLUS
7596 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7597 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7599 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7600 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7605 if (GET_CODE (x) == ASHIFT
7606 && CONST_INT_P (XEXP (x, 1))
7607 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7610 log = INTVAL (XEXP (x, 1));
7611 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7612 GEN_INT (1 << log));
7615 if (GET_CODE (x) == PLUS)
7617 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7619 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7620 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7621 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7624 log = INTVAL (XEXP (XEXP (x, 0), 1));
7625 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7626 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7627 GEN_INT (1 << log));
7630 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7631 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7632 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7635 log = INTVAL (XEXP (XEXP (x, 1), 1));
7636 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7637 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7638 GEN_INT (1 << log));
7641 /* Put multiply first if it isn't already. */
7642 if (GET_CODE (XEXP (x, 1)) == MULT)
7644 rtx tmp = XEXP (x, 0);
7645 XEXP (x, 0) = XEXP (x, 1);
7650 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7651 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7652 created by virtual register instantiation, register elimination, and
7653 similar optimizations. */
7654 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7657 x = gen_rtx_PLUS (Pmode,
7658 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7659 XEXP (XEXP (x, 1), 0)),
7660 XEXP (XEXP (x, 1), 1));
7664 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7665 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7666 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7668 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7669 && CONSTANT_P (XEXP (x, 1)))
7672 rtx other = NULL_RTX;
7674 if (CONST_INT_P (XEXP (x, 1)))
7676 constant = XEXP (x, 1);
7677 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7679 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7681 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7682 other = XEXP (x, 1);
7690 x = gen_rtx_PLUS (Pmode,
7691 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7692 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7693 plus_constant (other, INTVAL (constant)));
7697 if (changed && legitimate_address_p (mode, x, FALSE))
7700 if (GET_CODE (XEXP (x, 0)) == MULT)
7703 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7706 if (GET_CODE (XEXP (x, 1)) == MULT)
7709 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7713 && REG_P (XEXP (x, 1))
7714 && REG_P (XEXP (x, 0)))
7717 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7720 x = legitimize_pic_address (x, 0);
7723 if (changed && legitimate_address_p (mode, x, FALSE))
7726 if (REG_P (XEXP (x, 0)))
7728 rtx temp = gen_reg_rtx (Pmode);
7729 rtx val = force_operand (XEXP (x, 1), temp);
7731 emit_move_insn (temp, val);
7737 else if (REG_P (XEXP (x, 1)))
7739 rtx temp = gen_reg_rtx (Pmode);
7740 rtx val = force_operand (XEXP (x, 0), temp);
7742 emit_move_insn (temp, val);
7752 /* Print an integer constant expression in assembler syntax. Addition
7753 and subtraction are the only arithmetic that may appear in these
7754 expressions. FILE is the stdio stream to write to, X is the rtx, and
7755 CODE is the operand print code from the output string. */
7758 output_pic_addr_const (FILE *file, rtx x, int code)
7762 switch (GET_CODE (x))
7765 gcc_assert (flag_pic);
7770 if (! TARGET_MACHO || TARGET_64BIT)
7771 output_addr_const (file, x);
7774 const char *name = XSTR (x, 0);
7776 /* Mark the decl as referenced so that cgraph will
7777 output the function. */
7778 if (SYMBOL_REF_DECL (x))
7779 mark_decl_referenced (SYMBOL_REF_DECL (x));
7782 if (MACHOPIC_INDIRECT
7783 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7784 name = machopic_indirection_name (x, /*stub_p=*/true);
7786 assemble_name (file, name);
7788 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7789 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7790 fputs ("@PLT", file);
7797 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7798 assemble_name (asm_out_file, buf);
7802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7806 /* This used to output parentheses around the expression,
7807 but that does not work on the 386 (either ATT or BSD assembler). */
7808 output_pic_addr_const (file, XEXP (x, 0), code);
7812 if (GET_MODE (x) == VOIDmode)
7814 /* We can use %d if the number is <32 bits and positive. */
7815 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7816 fprintf (file, "0x%lx%08lx",
7817 (unsigned long) CONST_DOUBLE_HIGH (x),
7818 (unsigned long) CONST_DOUBLE_LOW (x));
7820 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7823 /* We can't handle floating point constants;
7824 PRINT_OPERAND must handle them. */
7825 output_operand_lossage ("floating constant misused");
7829 /* Some assemblers need integer constants to appear first. */
7830 if (CONST_INT_P (XEXP (x, 0)))
7832 output_pic_addr_const (file, XEXP (x, 0), code);
7834 output_pic_addr_const (file, XEXP (x, 1), code);
7838 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7839 output_pic_addr_const (file, XEXP (x, 1), code);
7841 output_pic_addr_const (file, XEXP (x, 0), code);
7847 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7848 output_pic_addr_const (file, XEXP (x, 0), code);
7850 output_pic_addr_const (file, XEXP (x, 1), code);
7852 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7856 gcc_assert (XVECLEN (x, 0) == 1);
7857 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7858 switch (XINT (x, 1))
7861 fputs ("@GOT", file);
7864 fputs ("@GOTOFF", file);
7867 fputs ("@PLTOFF", file);
7869 case UNSPEC_GOTPCREL:
7870 fputs ("@GOTPCREL(%rip)", file);
7872 case UNSPEC_GOTTPOFF:
7873 /* FIXME: This might be @TPOFF in Sun ld too. */
7874 fputs ("@GOTTPOFF", file);
7877 fputs ("@TPOFF", file);
7881 fputs ("@TPOFF", file);
7883 fputs ("@NTPOFF", file);
7886 fputs ("@DTPOFF", file);
7888 case UNSPEC_GOTNTPOFF:
7890 fputs ("@GOTTPOFF(%rip)", file);
7892 fputs ("@GOTNTPOFF", file);
7894 case UNSPEC_INDNTPOFF:
7895 fputs ("@INDNTPOFF", file);
7898 output_operand_lossage ("invalid UNSPEC as operand");
7904 output_operand_lossage ("invalid expression as operand");
7908 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7909 We need to emit DTP-relative relocations. */
7911 static void ATTRIBUTE_UNUSED
7912 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7914 fputs (ASM_LONG, file);
7915 output_addr_const (file, x);
7916 fputs ("@DTPOFF", file);
7922 fputs (", 0", file);
7929 /* In the name of slightly smaller debug output, and to cater to
7930 general assembler lossage, recognize PIC+GOTOFF and turn it back
7931 into a direct symbol reference.
7933 On Darwin, this is necessary to avoid a crash, because Darwin
7934 has a different PIC label for each routine but the DWARF debugging
7935 information is not associated with any particular routine, so it's
7936 necessary to remove references to the PIC label from RTL stored by
7937 the DWARF output code. */
7940 ix86_delegitimize_address (rtx orig_x)
7943 /* reg_addend is NULL or a multiple of some register. */
7944 rtx reg_addend = NULL_RTX;
7945 /* const_addend is NULL or a const_int. */
7946 rtx const_addend = NULL_RTX;
7947 /* This is the result, or NULL. */
7948 rtx result = NULL_RTX;
7955 if (GET_CODE (x) != CONST
7956 || GET_CODE (XEXP (x, 0)) != UNSPEC
7957 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7960 return XVECEXP (XEXP (x, 0), 0, 0);
7963 if (GET_CODE (x) != PLUS
7964 || GET_CODE (XEXP (x, 1)) != CONST)
7967 if (REG_P (XEXP (x, 0))
7968 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7969 /* %ebx + GOT/GOTOFF */
7971 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7973 /* %ebx + %reg * scale + GOT/GOTOFF */
7974 reg_addend = XEXP (x, 0);
7975 if (REG_P (XEXP (reg_addend, 0))
7976 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7977 reg_addend = XEXP (reg_addend, 1);
7978 else if (REG_P (XEXP (reg_addend, 1))
7979 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7980 reg_addend = XEXP (reg_addend, 0);
7983 if (!REG_P (reg_addend)
7984 && GET_CODE (reg_addend) != MULT
7985 && GET_CODE (reg_addend) != ASHIFT)
7991 x = XEXP (XEXP (x, 1), 0);
7992 if (GET_CODE (x) == PLUS
7993 && CONST_INT_P (XEXP (x, 1)))
7995 const_addend = XEXP (x, 1);
7999 if (GET_CODE (x) == UNSPEC
8000 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8001 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8002 result = XVECEXP (x, 0, 0);
8004 if (TARGET_MACHO && darwin_local_data_pic (x)
8006 result = XEXP (x, 0);
8012 result = gen_rtx_PLUS (Pmode, result, const_addend);
8014 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8018 /* If X is a machine specific address (i.e. a symbol or label being
8019 referenced as a displacement from the GOT implemented using an
8020 UNSPEC), then return the base term. Otherwise return X. */
8023 ix86_find_base_term (rtx x)
8029 if (GET_CODE (x) != CONST)
8032 if (GET_CODE (term) == PLUS
8033 && (CONST_INT_P (XEXP (term, 1))
8034 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8035 term = XEXP (term, 0);
8036 if (GET_CODE (term) != UNSPEC
8037 || XINT (term, 1) != UNSPEC_GOTPCREL)
8040 term = XVECEXP (term, 0, 0);
8042 if (GET_CODE (term) != SYMBOL_REF
8043 && GET_CODE (term) != LABEL_REF)
8049 term = ix86_delegitimize_address (x);
8051 if (GET_CODE (term) != SYMBOL_REF
8052 && GET_CODE (term) != LABEL_REF)
8059 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8064 if (mode == CCFPmode || mode == CCFPUmode)
8066 enum rtx_code second_code, bypass_code;
8067 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8068 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8069 code = ix86_fp_compare_code_to_integer (code);
8073 code = reverse_condition (code);
8084 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8088 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8089 Those same assemblers have the same but opposite lossage on cmov. */
8090 gcc_assert (mode == CCmode);
8091 suffix = fp ? "nbe" : "a";
8111 gcc_assert (mode == CCmode);
8133 gcc_assert (mode == CCmode);
8134 suffix = fp ? "nb" : "ae";
8137 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8141 gcc_assert (mode == CCmode);
8145 suffix = fp ? "u" : "p";
8148 suffix = fp ? "nu" : "np";
8153 fputs (suffix, file);
8156 /* Print the name of register X to FILE based on its machine mode and number.
8157 If CODE is 'w', pretend the mode is HImode.
8158 If CODE is 'b', pretend the mode is QImode.
8159 If CODE is 'k', pretend the mode is SImode.
8160 If CODE is 'q', pretend the mode is DImode.
8161 If CODE is 'h', pretend the reg is the 'high' byte register.
8162 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8165 print_reg (rtx x, int code, FILE *file)
8167 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8168 && REGNO (x) != FRAME_POINTER_REGNUM
8169 && REGNO (x) != FLAGS_REG
8170 && REGNO (x) != FPSR_REG
8171 && REGNO (x) != FPCR_REG);
8173 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8176 if (code == 'w' || MMX_REG_P (x))
8178 else if (code == 'b')
8180 else if (code == 'k')
8182 else if (code == 'q')
8184 else if (code == 'y')
8186 else if (code == 'h')
8189 code = GET_MODE_SIZE (GET_MODE (x));
8191 /* Irritatingly, AMD extended registers use different naming convention
8192 from the normal registers. */
8193 if (REX_INT_REG_P (x))
8195 gcc_assert (TARGET_64BIT);
8199 error ("extended registers have no high halves");
8202 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8205 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8208 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8211 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8214 error ("unsupported operand size for extended register");
8222 if (STACK_TOP_P (x))
8224 fputs ("st(0)", file);
8231 if (! ANY_FP_REG_P (x))
8232 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8237 fputs (hi_reg_name[REGNO (x)], file);
8240 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8242 fputs (qi_reg_name[REGNO (x)], file);
8245 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8247 fputs (qi_high_reg_name[REGNO (x)], file);
8254 /* Locate some local-dynamic symbol still in use by this function
8255 so that we can print its name in some tls_local_dynamic_base
8259 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8263 if (GET_CODE (x) == SYMBOL_REF
8264 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8266 cfun->machine->some_ld_name = XSTR (x, 0);
8274 get_some_local_dynamic_name (void)
8278 if (cfun->machine->some_ld_name)
8279 return cfun->machine->some_ld_name;
8281 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8283 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8284 return cfun->machine->some_ld_name;
8290 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8291 C -- print opcode suffix for set/cmov insn.
8292 c -- like C, but print reversed condition
8293 F,f -- likewise, but for floating-point.
8294 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8296 R -- print the prefix for register names.
8297 z -- print the opcode suffix for the size of the current operand.
8298 * -- print a star (in certain assembler syntax)
8299 A -- print an absolute memory reference.
8300 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8301 s -- print a shift double count, followed by the assemblers argument
8303 b -- print the QImode name of the register for the indicated operand.
8304 %b0 would print %al if operands[0] is reg 0.
8305 w -- likewise, print the HImode name of the register.
8306 k -- likewise, print the SImode name of the register.
8307 q -- likewise, print the DImode name of the register.
8308 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8309 y -- print "st(0)" instead of "st" as a register.
8310 D -- print condition for SSE cmp instruction.
8311 P -- if PIC, print an @PLT suffix.
8312 X -- don't print any sort of PIC '@' suffix for a symbol.
8313 & -- print some in-use local-dynamic symbol name.
8314 H -- print a memory address offset by 8; used for sse high-parts
8318 print_operand (FILE *file, rtx x, int code)
8325 if (ASSEMBLER_DIALECT == ASM_ATT)
8330 assemble_name (file, get_some_local_dynamic_name ());
8334 switch (ASSEMBLER_DIALECT)
8341 /* Intel syntax. For absolute addresses, registers should not
8342 be surrounded by braces. */
8346 PRINT_OPERAND (file, x, 0);
8356 PRINT_OPERAND (file, x, 0);
8361 if (ASSEMBLER_DIALECT == ASM_ATT)
8366 if (ASSEMBLER_DIALECT == ASM_ATT)
8371 if (ASSEMBLER_DIALECT == ASM_ATT)
8376 if (ASSEMBLER_DIALECT == ASM_ATT)
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8386 if (ASSEMBLER_DIALECT == ASM_ATT)
8391 /* 387 opcodes don't get size suffixes if the operands are
8393 if (STACK_REG_P (x))
8396 /* Likewise if using Intel opcodes. */
8397 if (ASSEMBLER_DIALECT == ASM_INTEL)
8400 /* This is the size of op from size of operand. */
8401 switch (GET_MODE_SIZE (GET_MODE (x)))
8410 #ifdef HAVE_GAS_FILDS_FISTS
8420 if (GET_MODE (x) == SFmode)
8435 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8437 #ifdef GAS_MNEMONICS
8463 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8465 PRINT_OPERAND (file, x, 0);
8471 /* Little bit of braindamage here. The SSE compare instructions
8472 does use completely different names for the comparisons that the
8473 fp conditional moves. */
8474 switch (GET_CODE (x))
8489 fputs ("unord", file);
8493 fputs ("neq", file);
8497 fputs ("nlt", file);
8501 fputs ("nle", file);
8504 fputs ("ord", file);
8511 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8512 if (ASSEMBLER_DIALECT == ASM_ATT)
8514 switch (GET_MODE (x))
8516 case HImode: putc ('w', file); break;
8518 case SFmode: putc ('l', file); break;
8520 case DFmode: putc ('q', file); break;
8521 default: gcc_unreachable ();
8528 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8531 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8532 if (ASSEMBLER_DIALECT == ASM_ATT)
8535 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8538 /* Like above, but reverse condition */
8540 /* Check to see if argument to %c is really a constant
8541 and not a condition code which needs to be reversed. */
8542 if (!COMPARISON_P (x))
8544 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8547 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8550 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8551 if (ASSEMBLER_DIALECT == ASM_ATT)
8554 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8558 /* It doesn't actually matter what mode we use here, as we're
8559 only going to use this for printing. */
8560 x = adjust_address_nv (x, DImode, 8);
8567 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8570 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8573 int pred_val = INTVAL (XEXP (x, 0));
8575 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8576 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8578 int taken = pred_val > REG_BR_PROB_BASE / 2;
8579 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8581 /* Emit hints only in the case default branch prediction
8582 heuristics would fail. */
8583 if (taken != cputaken)
8585 /* We use 3e (DS) prefix for taken branches and
8586 2e (CS) prefix for not taken branches. */
8588 fputs ("ds ; ", file);
8590 fputs ("cs ; ", file);
8597 output_operand_lossage ("invalid operand code '%c'", code);
8602 print_reg (x, code, file);
8606 /* No `byte ptr' prefix for call instructions. */
8607 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8610 switch (GET_MODE_SIZE (GET_MODE (x)))
8612 case 1: size = "BYTE"; break;
8613 case 2: size = "WORD"; break;
8614 case 4: size = "DWORD"; break;
8615 case 8: size = "QWORD"; break;
8616 case 12: size = "XWORD"; break;
8617 case 16: size = "XMMWORD"; break;
8622 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8625 else if (code == 'w')
8627 else if (code == 'k')
8631 fputs (" PTR ", file);
8635 /* Avoid (%rip) for call operands. */
8636 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8637 && !CONST_INT_P (x))
8638 output_addr_const (file, x);
8639 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8640 output_operand_lossage ("invalid constraints for operand");
8645 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8650 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8651 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8653 if (ASSEMBLER_DIALECT == ASM_ATT)
8655 fprintf (file, "0x%08lx", l);
8658 /* These float cases don't actually occur as immediate operands. */
8659 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8663 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8664 fprintf (file, "%s", dstr);
8667 else if (GET_CODE (x) == CONST_DOUBLE
8668 && GET_MODE (x) == XFmode)
8672 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8673 fprintf (file, "%s", dstr);
8678 /* We have patterns that allow zero sets of memory, for instance.
8679 In 64-bit mode, we should probably support all 8-byte vectors,
8680 since we can in fact encode that into an immediate. */
8681 if (GET_CODE (x) == CONST_VECTOR)
8683 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8689 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8691 if (ASSEMBLER_DIALECT == ASM_ATT)
8694 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8695 || GET_CODE (x) == LABEL_REF)
8697 if (ASSEMBLER_DIALECT == ASM_ATT)
8700 fputs ("OFFSET FLAT:", file);
8703 if (CONST_INT_P (x))
8704 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8706 output_pic_addr_const (file, x, code);
8708 output_addr_const (file, x);
8712 /* Print a memory operand whose address is ADDR. */
8715 print_operand_address (FILE *file, rtx addr)
8717 struct ix86_address parts;
8718 rtx base, index, disp;
8720 int ok = ix86_decompose_address (addr, &parts);
8725 index = parts.index;
8727 scale = parts.scale;
8735 if (USER_LABEL_PREFIX[0] == 0)
8737 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8743 if (!base && !index)
8745 /* Displacement only requires special attention. */
8747 if (CONST_INT_P (disp))
8749 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8751 if (USER_LABEL_PREFIX[0] == 0)
8753 fputs ("ds:", file);
8755 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8758 output_pic_addr_const (file, disp, 0);
8760 output_addr_const (file, disp);
8762 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8765 if (GET_CODE (disp) == CONST
8766 && GET_CODE (XEXP (disp, 0)) == PLUS
8767 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8768 disp = XEXP (XEXP (disp, 0), 0);
8769 if (GET_CODE (disp) == LABEL_REF
8770 || (GET_CODE (disp) == SYMBOL_REF
8771 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8772 fputs ("(%rip)", file);
8777 if (ASSEMBLER_DIALECT == ASM_ATT)
8782 output_pic_addr_const (file, disp, 0);
8783 else if (GET_CODE (disp) == LABEL_REF)
8784 output_asm_label (disp);
8786 output_addr_const (file, disp);
8791 print_reg (base, 0, file);
8795 print_reg (index, 0, file);
8797 fprintf (file, ",%d", scale);
8803 rtx offset = NULL_RTX;
8807 /* Pull out the offset of a symbol; print any symbol itself. */
8808 if (GET_CODE (disp) == CONST
8809 && GET_CODE (XEXP (disp, 0)) == PLUS
8810 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8812 offset = XEXP (XEXP (disp, 0), 1);
8813 disp = gen_rtx_CONST (VOIDmode,
8814 XEXP (XEXP (disp, 0), 0));
8818 output_pic_addr_const (file, disp, 0);
8819 else if (GET_CODE (disp) == LABEL_REF)
8820 output_asm_label (disp);
8821 else if (CONST_INT_P (disp))
8824 output_addr_const (file, disp);
8830 print_reg (base, 0, file);
8833 if (INTVAL (offset) >= 0)
8835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8839 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8846 print_reg (index, 0, file);
8848 fprintf (file, "*%d", scale);
8856 output_addr_const_extra (FILE *file, rtx x)
8860 if (GET_CODE (x) != UNSPEC)
8863 op = XVECEXP (x, 0, 0);
8864 switch (XINT (x, 1))
8866 case UNSPEC_GOTTPOFF:
8867 output_addr_const (file, op);
8868 /* FIXME: This might be @TPOFF in Sun ld. */
8869 fputs ("@GOTTPOFF", file);
8872 output_addr_const (file, op);
8873 fputs ("@TPOFF", file);
8876 output_addr_const (file, op);
8878 fputs ("@TPOFF", file);
8880 fputs ("@NTPOFF", file);
8883 output_addr_const (file, op);
8884 fputs ("@DTPOFF", file);
8886 case UNSPEC_GOTNTPOFF:
8887 output_addr_const (file, op);
8889 fputs ("@GOTTPOFF(%rip)", file);
8891 fputs ("@GOTNTPOFF", file);
8893 case UNSPEC_INDNTPOFF:
8894 output_addr_const (file, op);
8895 fputs ("@INDNTPOFF", file);
8905 /* Split one or more DImode RTL references into pairs of SImode
8906 references. The RTL can be REG, offsettable MEM, integer constant, or
8907 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8908 split and "num" is its length. lo_half and hi_half are output arrays
8909 that parallel "operands". */
8912 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8916 rtx op = operands[num];
8918 /* simplify_subreg refuse to split volatile memory addresses,
8919 but we still have to handle it. */
8922 lo_half[num] = adjust_address (op, SImode, 0);
8923 hi_half[num] = adjust_address (op, SImode, 4);
8927 lo_half[num] = simplify_gen_subreg (SImode, op,
8928 GET_MODE (op) == VOIDmode
8929 ? DImode : GET_MODE (op), 0);
8930 hi_half[num] = simplify_gen_subreg (SImode, op,
8931 GET_MODE (op) == VOIDmode
8932 ? DImode : GET_MODE (op), 4);
8936 /* Split one or more TImode RTL references into pairs of DImode
8937 references. The RTL can be REG, offsettable MEM, integer constant, or
8938 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8939 split and "num" is its length. lo_half and hi_half are output arrays
8940 that parallel "operands". */
8943 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8947 rtx op = operands[num];
8949 /* simplify_subreg refuse to split volatile memory addresses, but we
8950 still have to handle it. */
8953 lo_half[num] = adjust_address (op, DImode, 0);
8954 hi_half[num] = adjust_address (op, DImode, 8);
8958 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8959 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8964 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8965 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8966 is the expression of the binary operation. The output may either be
8967 emitted here, or returned to the caller, like all output_* functions.
8969 There is no guarantee that the operands are the same mode, as they
8970 might be within FLOAT or FLOAT_EXTEND expressions. */
8972 #ifndef SYSV386_COMPAT
8973 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8974 wants to fix the assemblers because that causes incompatibility
8975 with gcc. No-one wants to fix gcc because that causes
8976 incompatibility with assemblers... You can use the option of
8977 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8978 #define SYSV386_COMPAT 1
8982 output_387_binary_op (rtx insn, rtx *operands)
8984 static char buf[30];
8987 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8989 #ifdef ENABLE_CHECKING
8990 /* Even if we do not want to check the inputs, this documents input
8991 constraints. Which helps in understanding the following code. */
8992 if (STACK_REG_P (operands[0])
8993 && ((REG_P (operands[1])
8994 && REGNO (operands[0]) == REGNO (operands[1])
8995 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8996 || (REG_P (operands[2])
8997 && REGNO (operands[0]) == REGNO (operands[2])
8998 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8999 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9002 gcc_assert (is_sse);
9005 switch (GET_CODE (operands[3]))
9008 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9009 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9017 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9018 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9026 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9027 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9035 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9036 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9050 if (GET_MODE (operands[0]) == SFmode)
9051 strcat (buf, "ss\t{%2, %0|%0, %2}");
9053 strcat (buf, "sd\t{%2, %0|%0, %2}");
9058 switch (GET_CODE (operands[3]))
9062 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9064 rtx temp = operands[2];
9065 operands[2] = operands[1];
9069 /* know operands[0] == operands[1]. */
9071 if (MEM_P (operands[2]))
9077 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9079 if (STACK_TOP_P (operands[0]))
9080 /* How is it that we are storing to a dead operand[2]?
9081 Well, presumably operands[1] is dead too. We can't
9082 store the result to st(0) as st(0) gets popped on this
9083 instruction. Instead store to operands[2] (which I
9084 think has to be st(1)). st(1) will be popped later.
9085 gcc <= 2.8.1 didn't have this check and generated
9086 assembly code that the Unixware assembler rejected. */
9087 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9089 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9093 if (STACK_TOP_P (operands[0]))
9094 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9096 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9101 if (MEM_P (operands[1]))
9107 if (MEM_P (operands[2]))
9113 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9116 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9117 derived assemblers, confusingly reverse the direction of
9118 the operation for fsub{r} and fdiv{r} when the
9119 destination register is not st(0). The Intel assembler
9120 doesn't have this brain damage. Read !SYSV386_COMPAT to
9121 figure out what the hardware really does. */
9122 if (STACK_TOP_P (operands[0]))
9123 p = "{p\t%0, %2|rp\t%2, %0}";
9125 p = "{rp\t%2, %0|p\t%0, %2}";
9127 if (STACK_TOP_P (operands[0]))
9128 /* As above for fmul/fadd, we can't store to st(0). */
9129 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9131 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9136 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9139 if (STACK_TOP_P (operands[0]))
9140 p = "{rp\t%0, %1|p\t%1, %0}";
9142 p = "{p\t%1, %0|rp\t%0, %1}";
9144 if (STACK_TOP_P (operands[0]))
9145 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9147 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9152 if (STACK_TOP_P (operands[0]))
9154 if (STACK_TOP_P (operands[1]))
9155 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9157 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9160 else if (STACK_TOP_P (operands[1]))
9163 p = "{\t%1, %0|r\t%0, %1}";
9165 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9171 p = "{r\t%2, %0|\t%0, %2}";
9173 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9186 /* Return needed mode for entity in optimize_mode_switching pass. */
9189 ix86_mode_needed (int entity, rtx insn)
9191 enum attr_i387_cw mode;
9193 /* The mode UNINITIALIZED is used to store control word after a
9194 function call or ASM pattern. The mode ANY specify that function
9195 has no requirements on the control word and make no changes in the
9196 bits we are interested in. */
9199 || (NONJUMP_INSN_P (insn)
9200 && (asm_noperands (PATTERN (insn)) >= 0
9201 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9202 return I387_CW_UNINITIALIZED;
9204 if (recog_memoized (insn) < 0)
9207 mode = get_attr_i387_cw (insn);
9212 if (mode == I387_CW_TRUNC)
9217 if (mode == I387_CW_FLOOR)
9222 if (mode == I387_CW_CEIL)
9227 if (mode == I387_CW_MASK_PM)
9238 /* Output code to initialize control word copies used by trunc?f?i and
9239 rounding patterns. CURRENT_MODE is set to current control word,
9240 while NEW_MODE is set to new control word. */
9243 emit_i387_cw_initialization (int mode)
9245 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9250 rtx reg = gen_reg_rtx (HImode);
9252 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9253 emit_move_insn (reg, copy_rtx (stored_mode));
9255 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9260 /* round toward zero (truncate) */
9261 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9262 slot = SLOT_CW_TRUNC;
9266 /* round down toward -oo */
9267 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9268 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9269 slot = SLOT_CW_FLOOR;
9273 /* round up toward +oo */
9274 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9275 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9276 slot = SLOT_CW_CEIL;
9279 case I387_CW_MASK_PM:
9280 /* mask precision exception for nearbyint() */
9281 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9282 slot = SLOT_CW_MASK_PM;
9294 /* round toward zero (truncate) */
9295 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9296 slot = SLOT_CW_TRUNC;
9300 /* round down toward -oo */
9301 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9302 slot = SLOT_CW_FLOOR;
9306 /* round up toward +oo */
9307 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9308 slot = SLOT_CW_CEIL;
9311 case I387_CW_MASK_PM:
9312 /* mask precision exception for nearbyint() */
9313 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9314 slot = SLOT_CW_MASK_PM;
9322 gcc_assert (slot < MAX_386_STACK_LOCALS);
9324 new_mode = assign_386_stack_local (HImode, slot);
9325 emit_move_insn (new_mode, reg);
9328 /* Output code for INSN to convert a float to a signed int. OPERANDS
9329 are the insn operands. The output may be [HSD]Imode and the input
9330 operand may be [SDX]Fmode. */
9333 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9335 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9336 int dimode_p = GET_MODE (operands[0]) == DImode;
9337 int round_mode = get_attr_i387_cw (insn);
9339 /* Jump through a hoop or two for DImode, since the hardware has no
9340 non-popping instruction. We used to do this a different way, but
9341 that was somewhat fragile and broke with post-reload splitters. */
9342 if ((dimode_p || fisttp) && !stack_top_dies)
9343 output_asm_insn ("fld\t%y1", operands);
9345 gcc_assert (STACK_TOP_P (operands[1]));
9346 gcc_assert (MEM_P (operands[0]));
9347 gcc_assert (GET_MODE (operands[1]) != TFmode);
9350 output_asm_insn ("fisttp%z0\t%0", operands);
9353 if (round_mode != I387_CW_ANY)
9354 output_asm_insn ("fldcw\t%3", operands);
9355 if (stack_top_dies || dimode_p)
9356 output_asm_insn ("fistp%z0\t%0", operands);
9358 output_asm_insn ("fist%z0\t%0", operands);
9359 if (round_mode != I387_CW_ANY)
9360 output_asm_insn ("fldcw\t%2", operands);
9366 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9367 have the values zero or one, indicates the ffreep insn's operand
9368 from the OPERANDS array. */
9371 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9373 if (TARGET_USE_FFREEP)
9374 #if HAVE_AS_IX86_FFREEP
9375 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9378 static char retval[] = ".word\t0xc_df";
9379 int regno = REGNO (operands[opno]);
9381 gcc_assert (FP_REGNO_P (regno));
9383 retval[9] = '0' + (regno - FIRST_STACK_REG);
9388 return opno ? "fstp\t%y1" : "fstp\t%y0";
9392 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9393 should be used. UNORDERED_P is true when fucom should be used. */
9396 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9399 rtx cmp_op0, cmp_op1;
9400 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9404 cmp_op0 = operands[0];
9405 cmp_op1 = operands[1];
9409 cmp_op0 = operands[1];
9410 cmp_op1 = operands[2];
9415 if (GET_MODE (operands[0]) == SFmode)
9417 return "ucomiss\t{%1, %0|%0, %1}";
9419 return "comiss\t{%1, %0|%0, %1}";
9422 return "ucomisd\t{%1, %0|%0, %1}";
9424 return "comisd\t{%1, %0|%0, %1}";
9427 gcc_assert (STACK_TOP_P (cmp_op0));
9429 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9431 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9435 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9436 return output_387_ffreep (operands, 1);
9439 return "ftst\n\tfnstsw\t%0";
9442 if (STACK_REG_P (cmp_op1)
9444 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9445 && REGNO (cmp_op1) != FIRST_STACK_REG)
9447 /* If both the top of the 387 stack dies, and the other operand
9448 is also a stack register that dies, then this must be a
9449 `fcompp' float compare */
9453 /* There is no double popping fcomi variant. Fortunately,
9454 eflags is immune from the fstp's cc clobbering. */
9456 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9458 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9459 return output_387_ffreep (operands, 0);
9464 return "fucompp\n\tfnstsw\t%0";
9466 return "fcompp\n\tfnstsw\t%0";
9471 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9473 static const char * const alt[16] =
9475 "fcom%z2\t%y2\n\tfnstsw\t%0",
9476 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9477 "fucom%z2\t%y2\n\tfnstsw\t%0",
9478 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9480 "ficom%z2\t%y2\n\tfnstsw\t%0",
9481 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9485 "fcomi\t{%y1, %0|%0, %y1}",
9486 "fcomip\t{%y1, %0|%0, %y1}",
9487 "fucomi\t{%y1, %0|%0, %y1}",
9488 "fucomip\t{%y1, %0|%0, %y1}",
9499 mask = eflags_p << 3;
9500 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9501 mask |= unordered_p << 1;
9502 mask |= stack_top_dies;
9504 gcc_assert (mask < 16);
9513 ix86_output_addr_vec_elt (FILE *file, int value)
9515 const char *directive = ASM_LONG;
9519 directive = ASM_QUAD;
9521 gcc_assert (!TARGET_64BIT);
9524 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9528 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9530 const char *directive = ASM_LONG;
9533 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9534 directive = ASM_QUAD;
9536 gcc_assert (!TARGET_64BIT);
9538 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9539 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9540 fprintf (file, "%s%s%d-%s%d\n",
9541 directive, LPREFIX, value, LPREFIX, rel);
9542 else if (HAVE_AS_GOTOFF_IN_DATA)
9543 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9545 else if (TARGET_MACHO)
9547 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9548 machopic_output_function_base_name (file);
9549 fprintf(file, "\n");
9553 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9554 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9557 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9561 ix86_expand_clear (rtx dest)
9565 /* We play register width games, which are only valid after reload. */
9566 gcc_assert (reload_completed);
9568 /* Avoid HImode and its attendant prefix byte. */
9569 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9570 dest = gen_rtx_REG (SImode, REGNO (dest));
9571 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9573 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9574 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9576 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9577 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9583 /* X is an unchanging MEM. If it is a constant pool reference, return
9584 the constant pool rtx, else NULL. */
9587 maybe_get_pool_constant (rtx x)
9589 x = ix86_delegitimize_address (XEXP (x, 0));
9591 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9592 return get_pool_constant (x);
9598 ix86_expand_move (enum machine_mode mode, rtx operands[])
9600 int strict = (reload_in_progress || reload_completed);
9602 enum tls_model model;
9607 if (GET_CODE (op1) == SYMBOL_REF)
9609 model = SYMBOL_REF_TLS_MODEL (op1);
9612 op1 = legitimize_tls_address (op1, model, true);
9613 op1 = force_operand (op1, op0);
9617 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9618 && SYMBOL_REF_DLLIMPORT_P (op1))
9619 op1 = legitimize_dllimport_symbol (op1, false);
9621 else if (GET_CODE (op1) == CONST
9622 && GET_CODE (XEXP (op1, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9625 rtx addend = XEXP (XEXP (op1, 0), 1);
9626 rtx symbol = XEXP (XEXP (op1, 0), 0);
9629 model = SYMBOL_REF_TLS_MODEL (symbol);
9631 tmp = legitimize_tls_address (symbol, model, true);
9632 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9633 && SYMBOL_REF_DLLIMPORT_P (symbol))
9634 tmp = legitimize_dllimport_symbol (symbol, true);
9638 tmp = force_operand (tmp, NULL);
9639 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9640 op0, 1, OPTAB_DIRECT);
9646 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9648 if (TARGET_MACHO && !TARGET_64BIT)
9653 rtx temp = ((reload_in_progress
9654 || ((op0 && REG_P (op0))
9656 ? op0 : gen_reg_rtx (Pmode));
9657 op1 = machopic_indirect_data_reference (op1, temp);
9658 op1 = machopic_legitimize_pic_address (op1, mode,
9659 temp == op1 ? 0 : temp);
9661 else if (MACHOPIC_INDIRECT)
9662 op1 = machopic_indirect_data_reference (op1, 0);
9670 op1 = force_reg (Pmode, op1);
9671 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9673 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9674 op1 = legitimize_pic_address (op1, reg);
9683 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9684 || !push_operand (op0, mode))
9686 op1 = force_reg (mode, op1);
9688 if (push_operand (op0, mode)
9689 && ! general_no_elim_operand (op1, mode))
9690 op1 = copy_to_mode_reg (mode, op1);
9692 /* Force large constants in 64bit compilation into register
9693 to get them CSEed. */
9694 if (TARGET_64BIT && mode == DImode
9695 && immediate_operand (op1, mode)
9696 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9697 && !register_operand (op0, mode)
9698 && optimize && !reload_completed && !reload_in_progress)
9699 op1 = copy_to_mode_reg (mode, op1);
9701 if (FLOAT_MODE_P (mode))
9703 /* If we are loading a floating point constant to a register,
9704 force the value to memory now, since we'll get better code
9705 out the back end. */
9709 else if (GET_CODE (op1) == CONST_DOUBLE)
9711 op1 = validize_mem (force_const_mem (mode, op1));
9712 if (!register_operand (op0, mode))
9714 rtx temp = gen_reg_rtx (mode);
9715 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9716 emit_move_insn (op0, temp);
9723 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9727 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9729 rtx op0 = operands[0], op1 = operands[1];
9731 /* Force constants other than zero into memory. We do not know how
9732 the instructions used to build constants modify the upper 64 bits
9733 of the register, once we have that information we may be able
9734 to handle some of them more efficiently. */
9735 if ((reload_in_progress | reload_completed) == 0
9736 && register_operand (op0, mode)
9738 && standard_sse_constant_p (op1) <= 0)
9739 op1 = validize_mem (force_const_mem (mode, op1));
9741 /* Make operand1 a register if it isn't already. */
9743 && !register_operand (op0, mode)
9744 && !register_operand (op1, mode))
9746 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9750 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9753 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9754 straight to ix86_expand_vector_move. */
9755 /* Code generation for scalar reg-reg moves of single and double precision data:
9756 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9760 if (x86_sse_partial_reg_dependency == true)
9765 Code generation for scalar loads of double precision data:
9766 if (x86_sse_split_regs == true)
9767 movlpd mem, reg (gas syntax)
9771 Code generation for unaligned packed loads of single precision data
9772 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9773 if (x86_sse_unaligned_move_optimal)
9776 if (x86_sse_partial_reg_dependency == true)
9788 Code generation for unaligned packed loads of double precision data
9789 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9790 if (x86_sse_unaligned_move_optimal)
9793 if (x86_sse_split_regs == true)
9806 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9815 /* If we're optimizing for size, movups is the smallest. */
9818 op0 = gen_lowpart (V4SFmode, op0);
9819 op1 = gen_lowpart (V4SFmode, op1);
9820 emit_insn (gen_sse_movups (op0, op1));
9824 /* ??? If we have typed data, then it would appear that using
9825 movdqu is the only way to get unaligned data loaded with
9827 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9829 op0 = gen_lowpart (V16QImode, op0);
9830 op1 = gen_lowpart (V16QImode, op1);
9831 emit_insn (gen_sse2_movdqu (op0, op1));
9835 if (TARGET_SSE2 && mode == V2DFmode)
9839 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9841 op0 = gen_lowpart (V2DFmode, op0);
9842 op1 = gen_lowpart (V2DFmode, op1);
9843 emit_insn (gen_sse2_movupd (op0, op1));
9847 /* When SSE registers are split into halves, we can avoid
9848 writing to the top half twice. */
9849 if (TARGET_SSE_SPLIT_REGS)
9851 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9856 /* ??? Not sure about the best option for the Intel chips.
9857 The following would seem to satisfy; the register is
9858 entirely cleared, breaking the dependency chain. We
9859 then store to the upper half, with a dependency depth
9860 of one. A rumor has it that Intel recommends two movsd
9861 followed by an unpacklpd, but this is unconfirmed. And
9862 given that the dependency depth of the unpacklpd would
9863 still be one, I'm not sure why this would be better. */
9864 zero = CONST0_RTX (V2DFmode);
9867 m = adjust_address (op1, DFmode, 0);
9868 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9869 m = adjust_address (op1, DFmode, 8);
9870 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9874 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9876 op0 = gen_lowpart (V4SFmode, op0);
9877 op1 = gen_lowpart (V4SFmode, op1);
9878 emit_insn (gen_sse_movups (op0, op1));
9882 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9883 emit_move_insn (op0, CONST0_RTX (mode));
9885 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9887 if (mode != V4SFmode)
9888 op0 = gen_lowpart (V4SFmode, op0);
9889 m = adjust_address (op1, V2SFmode, 0);
9890 emit_insn (gen_sse_loadlps (op0, op0, m));
9891 m = adjust_address (op1, V2SFmode, 8);
9892 emit_insn (gen_sse_loadhps (op0, op0, m));
9895 else if (MEM_P (op0))
9897 /* If we're optimizing for size, movups is the smallest. */
9900 op0 = gen_lowpart (V4SFmode, op0);
9901 op1 = gen_lowpart (V4SFmode, op1);
9902 emit_insn (gen_sse_movups (op0, op1));
9906 /* ??? Similar to above, only less clear because of quote
9907 typeless stores unquote. */
9908 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9909 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9911 op0 = gen_lowpart (V16QImode, op0);
9912 op1 = gen_lowpart (V16QImode, op1);
9913 emit_insn (gen_sse2_movdqu (op0, op1));
9917 if (TARGET_SSE2 && mode == V2DFmode)
9919 m = adjust_address (op0, DFmode, 0);
9920 emit_insn (gen_sse2_storelpd (m, op1));
9921 m = adjust_address (op0, DFmode, 8);
9922 emit_insn (gen_sse2_storehpd (m, op1));
9926 if (mode != V4SFmode)
9927 op1 = gen_lowpart (V4SFmode, op1);
9928 m = adjust_address (op0, V2SFmode, 0);
9929 emit_insn (gen_sse_storelps (m, op1));
9930 m = adjust_address (op0, V2SFmode, 8);
9931 emit_insn (gen_sse_storehps (m, op1));
9938 /* Expand a push in MODE. This is some mode for which we do not support
9939 proper push instructions, at least from the registers that we expect
9940 the value to live in. */
9943 ix86_expand_push (enum machine_mode mode, rtx x)
9947 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9948 GEN_INT (-GET_MODE_SIZE (mode)),
9949 stack_pointer_rtx, 1, OPTAB_DIRECT);
9950 if (tmp != stack_pointer_rtx)
9951 emit_move_insn (stack_pointer_rtx, tmp);
9953 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9954 emit_move_insn (tmp, x);
9957 /* Helper function of ix86_fixup_binary_operands to canonicalize
9958 operand order. Returns true if the operands should be swapped. */
9961 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9964 rtx dst = operands[0];
9965 rtx src1 = operands[1];
9966 rtx src2 = operands[2];
9968 /* If the operation is not commutative, we can't do anything. */
9969 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9972 /* Highest priority is that src1 should match dst. */
9973 if (rtx_equal_p (dst, src1))
9975 if (rtx_equal_p (dst, src2))
9978 /* Next highest priority is that immediate constants come second. */
9979 if (immediate_operand (src2, mode))
9981 if (immediate_operand (src1, mode))
9984 /* Lowest priority is that memory references should come second. */
9994 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9995 destination to use for the operation. If different from the true
9996 destination in operands[0], a copy operation will be required. */
9999 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10002 rtx dst = operands[0];
10003 rtx src1 = operands[1];
10004 rtx src2 = operands[2];
10006 /* Canonicalize operand order. */
10007 if (ix86_swap_binary_operands_p (code, mode, operands))
10014 /* Both source operands cannot be in memory. */
10015 if (MEM_P (src1) && MEM_P (src2))
10017 /* Optimization: Only read from memory once. */
10018 if (rtx_equal_p (src1, src2))
10020 src2 = force_reg (mode, src2);
10024 src2 = force_reg (mode, src2);
10027 /* If the destination is memory, and we do not have matching source
10028 operands, do things in registers. */
10029 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10030 dst = gen_reg_rtx (mode);
10032 /* Source 1 cannot be a constant. */
10033 if (CONSTANT_P (src1))
10034 src1 = force_reg (mode, src1);
10036 /* Source 1 cannot be a non-matching memory. */
10037 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10038 src1 = force_reg (mode, src1);
10040 operands[1] = src1;
10041 operands[2] = src2;
10045 /* Similarly, but assume that the destination has already been
10046 set up properly. */
10049 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10050 enum machine_mode mode, rtx operands[])
10052 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10053 gcc_assert (dst == operands[0]);
10056 /* Attempt to expand a binary operator. Make the expansion closer to the
10057 actual machine, then just general_operand, which will allow 3 separate
10058 memory references (one output, two input) in a single insn. */
10061 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10064 rtx src1, src2, dst, op, clob;
10066 dst = ix86_fixup_binary_operands (code, mode, operands);
10067 src1 = operands[1];
10068 src2 = operands[2];
10070 /* Emit the instruction. */
10072 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10073 if (reload_in_progress)
10075 /* Reload doesn't know about the flags register, and doesn't know that
10076 it doesn't want to clobber it. We can only do this with PLUS. */
10077 gcc_assert (code == PLUS);
10082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10083 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10086 /* Fix up the destination if needed. */
10087 if (dst != operands[0])
10088 emit_move_insn (operands[0], dst);
10091 /* Return TRUE or FALSE depending on whether the binary operator meets the
10092 appropriate constraints. */
10095 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10098 rtx dst = operands[0];
10099 rtx src1 = operands[1];
10100 rtx src2 = operands[2];
10102 /* Both source operands cannot be in memory. */
10103 if (MEM_P (src1) && MEM_P (src2))
10106 /* Canonicalize operand order for commutative operators. */
10107 if (ix86_swap_binary_operands_p (code, mode, operands))
10114 /* If the destination is memory, we must have a matching source operand. */
10115 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10118 /* Source 1 cannot be a constant. */
10119 if (CONSTANT_P (src1))
10122 /* Source 1 cannot be a non-matching memory. */
10123 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10129 /* Attempt to expand a unary operator. Make the expansion closer to the
10130 actual machine, then just general_operand, which will allow 2 separate
10131 memory references (one output, one input) in a single insn. */
10134 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10137 int matching_memory;
10138 rtx src, dst, op, clob;
10143 /* If the destination is memory, and we do not have matching source
10144 operands, do things in registers. */
10145 matching_memory = 0;
10148 if (rtx_equal_p (dst, src))
10149 matching_memory = 1;
10151 dst = gen_reg_rtx (mode);
10154 /* When source operand is memory, destination must match. */
10155 if (MEM_P (src) && !matching_memory)
10156 src = force_reg (mode, src);
10158 /* Emit the instruction. */
10160 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10161 if (reload_in_progress || code == NOT)
10163 /* Reload doesn't know about the flags register, and doesn't know that
10164 it doesn't want to clobber it. */
10165 gcc_assert (code == NOT);
10170 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10171 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10174 /* Fix up the destination if needed. */
10175 if (dst != operands[0])
10176 emit_move_insn (operands[0], dst);
10179 /* Return TRUE or FALSE depending on whether the unary operator meets the
10180 appropriate constraints. */
10183 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10184 enum machine_mode mode ATTRIBUTE_UNUSED,
10185 rtx operands[2] ATTRIBUTE_UNUSED)
10187 /* If one of operands is memory, source and destination must match. */
10188 if ((MEM_P (operands[0])
10189 || MEM_P (operands[1]))
10190 && ! rtx_equal_p (operands[0], operands[1]))
10195 /* Post-reload splitter for converting an SF or DFmode value in an
10196 SSE register into an unsigned SImode. */
10199 ix86_split_convert_uns_si_sse (rtx operands[])
10201 enum machine_mode vecmode;
10202 rtx value, large, zero_or_two31, input, two31, x;
10204 large = operands[1];
10205 zero_or_two31 = operands[2];
10206 input = operands[3];
10207 two31 = operands[4];
10208 vecmode = GET_MODE (large);
10209 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10211 /* Load up the value into the low element. We must ensure that the other
10212 elements are valid floats -- zero is the easiest such value. */
10215 if (vecmode == V4SFmode)
10216 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10218 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10222 input = gen_rtx_REG (vecmode, REGNO (input));
10223 emit_move_insn (value, CONST0_RTX (vecmode));
10224 if (vecmode == V4SFmode)
10225 emit_insn (gen_sse_movss (value, value, input));
10227 emit_insn (gen_sse2_movsd (value, value, input));
10230 emit_move_insn (large, two31);
10231 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10233 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10234 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10236 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10237 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10239 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10240 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10242 large = gen_rtx_REG (V4SImode, REGNO (large));
10243 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10245 x = gen_rtx_REG (V4SImode, REGNO (value));
10246 if (vecmode == V4SFmode)
10247 emit_insn (gen_sse2_cvttps2dq (x, value));
10249 emit_insn (gen_sse2_cvttpd2dq (x, value));
10252 emit_insn (gen_xorv4si3 (value, value, large));
10255 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10256 Expects the 64-bit DImode to be supplied in a pair of integral
10257 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10258 -mfpmath=sse, !optimize_size only. */
10261 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10263 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10264 rtx int_xmm, fp_xmm;
10265 rtx biases, exponents;
10268 int_xmm = gen_reg_rtx (V4SImode);
10269 if (TARGET_INTER_UNIT_MOVES)
10270 emit_insn (gen_movdi_to_sse (int_xmm, input));
10271 else if (TARGET_SSE_SPLIT_REGS)
10273 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10274 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10278 x = gen_reg_rtx (V2DImode);
10279 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10280 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10283 x = gen_rtx_CONST_VECTOR (V4SImode,
10284 gen_rtvec (4, GEN_INT (0x43300000UL),
10285 GEN_INT (0x45300000UL),
10286 const0_rtx, const0_rtx));
10287 exponents = validize_mem (force_const_mem (V4SImode, x));
10289 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10290 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10292 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10293 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10294 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10295 (0x1.0p84 + double(fp_value_hi_xmm)).
10296 Note these exponents differ by 32. */
10298 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10300 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10301 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10302 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10303 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10304 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10305 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10306 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10307 biases = validize_mem (force_const_mem (V2DFmode, biases));
10308 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10310 /* Add the upper and lower DFmode values together. */
10312 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10315 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10316 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10317 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10320 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10323 /* Convert an unsigned SImode value into a DFmode. Only currently used
10324 for SSE, but applicable anywhere. */
10327 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10329 REAL_VALUE_TYPE TWO31r;
10332 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10333 NULL, 1, OPTAB_DIRECT);
10335 fp = gen_reg_rtx (DFmode);
10336 emit_insn (gen_floatsidf2 (fp, x));
10338 real_ldexp (&TWO31r, &dconst1, 31);
10339 x = const_double_from_real_value (TWO31r, DFmode);
10341 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10343 emit_move_insn (target, x);
10346 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10347 32-bit mode; otherwise we have a direct convert instruction. */
10350 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10352 REAL_VALUE_TYPE TWO32r;
10353 rtx fp_lo, fp_hi, x;
10355 fp_lo = gen_reg_rtx (DFmode);
10356 fp_hi = gen_reg_rtx (DFmode);
10358 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10360 real_ldexp (&TWO32r, &dconst1, 32);
10361 x = const_double_from_real_value (TWO32r, DFmode);
10362 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10364 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10366 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10369 emit_move_insn (target, x);
10372 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10373 For x86_32, -mfpmath=sse, !optimize_size only. */
10375 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10377 REAL_VALUE_TYPE ONE16r;
10378 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10380 real_ldexp (&ONE16r, &dconst1, 16);
10381 x = const_double_from_real_value (ONE16r, SFmode);
10382 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10383 NULL, 0, OPTAB_DIRECT);
10384 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10385 NULL, 0, OPTAB_DIRECT);
10386 fp_hi = gen_reg_rtx (SFmode);
10387 fp_lo = gen_reg_rtx (SFmode);
10388 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10389 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10390 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10392 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10394 if (!rtx_equal_p (target, fp_hi))
10395 emit_move_insn (target, fp_hi);
10398 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10399 then replicate the value for all elements of the vector
10403 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10410 v = gen_rtvec (4, value, value, value, value);
10412 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10413 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10414 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10418 v = gen_rtvec (2, value, value);
10420 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10421 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10424 gcc_unreachable ();
10428 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10429 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10430 true, then replicate the mask for all elements of the vector register.
10431 If INVERT is true, then create a mask excluding the sign bit. */
10434 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10436 enum machine_mode vec_mode;
10437 HOST_WIDE_INT hi, lo;
10442 /* Find the sign bit, sign extended to 2*HWI. */
10443 if (mode == SFmode)
10444 lo = 0x80000000, hi = lo < 0;
10445 else if (HOST_BITS_PER_WIDE_INT >= 64)
10446 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10448 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10451 lo = ~lo, hi = ~hi;
10453 /* Force this value into the low part of a fp vector constant. */
10454 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10455 mask = gen_lowpart (mode, mask);
10457 v = ix86_build_const_vector (mode, vect, mask);
10458 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10459 return force_reg (vec_mode, v);
10462 /* Generate code for floating point ABS or NEG. */
10465 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10468 rtx mask, set, use, clob, dst, src;
10469 bool matching_memory;
10470 bool use_sse = false;
10471 bool vector_mode = VECTOR_MODE_P (mode);
10472 enum machine_mode elt_mode = mode;
10476 elt_mode = GET_MODE_INNER (mode);
10479 else if (TARGET_SSE_MATH)
10480 use_sse = SSE_FLOAT_MODE_P (mode);
10482 /* NEG and ABS performed with SSE use bitwise mask operations.
10483 Create the appropriate mask now. */
10485 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10492 /* If the destination is memory, and we don't have matching source
10493 operands or we're using the x87, do things in registers. */
10494 matching_memory = false;
10497 if (use_sse && rtx_equal_p (dst, src))
10498 matching_memory = true;
10500 dst = gen_reg_rtx (mode);
10502 if (MEM_P (src) && !matching_memory)
10503 src = force_reg (mode, src);
10507 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10508 set = gen_rtx_SET (VOIDmode, dst, set);
10513 set = gen_rtx_fmt_e (code, mode, src);
10514 set = gen_rtx_SET (VOIDmode, dst, set);
10517 use = gen_rtx_USE (VOIDmode, mask);
10518 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10519 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10520 gen_rtvec (3, set, use, clob)));
10526 if (dst != operands[0])
10527 emit_move_insn (operands[0], dst);
10530 /* Expand a copysign operation. Special case operand 0 being a constant. */
10533 ix86_expand_copysign (rtx operands[])
10535 enum machine_mode mode, vmode;
10536 rtx dest, op0, op1, mask, nmask;
10538 dest = operands[0];
10542 mode = GET_MODE (dest);
10543 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10545 if (GET_CODE (op0) == CONST_DOUBLE)
10549 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10550 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10552 if (op0 == CONST0_RTX (mode))
10553 op0 = CONST0_RTX (vmode);
10556 if (mode == SFmode)
10557 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10558 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10560 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10561 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10564 mask = ix86_build_signbit_mask (mode, 0, 0);
10566 if (mode == SFmode)
10567 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10569 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10573 nmask = ix86_build_signbit_mask (mode, 0, 1);
10574 mask = ix86_build_signbit_mask (mode, 0, 0);
10576 if (mode == SFmode)
10577 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10579 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10583 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10584 be a constant, and so has already been expanded into a vector constant. */
10587 ix86_split_copysign_const (rtx operands[])
10589 enum machine_mode mode, vmode;
10590 rtx dest, op0, op1, mask, x;
10592 dest = operands[0];
10595 mask = operands[3];
10597 mode = GET_MODE (dest);
10598 vmode = GET_MODE (mask);
10600 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10601 x = gen_rtx_AND (vmode, dest, mask);
10602 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10604 if (op0 != CONST0_RTX (vmode))
10606 x = gen_rtx_IOR (vmode, dest, op0);
10607 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10611 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10612 so we have to do two masks. */
10615 ix86_split_copysign_var (rtx operands[])
10617 enum machine_mode mode, vmode;
10618 rtx dest, scratch, op0, op1, mask, nmask, x;
10620 dest = operands[0];
10621 scratch = operands[1];
10624 nmask = operands[4];
10625 mask = operands[5];
10627 mode = GET_MODE (dest);
10628 vmode = GET_MODE (mask);
10630 if (rtx_equal_p (op0, op1))
10632 /* Shouldn't happen often (it's useless, obviously), but when it does
10633 we'd generate incorrect code if we continue below. */
10634 emit_move_insn (dest, op0);
10638 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10640 gcc_assert (REGNO (op1) == REGNO (scratch));
10642 x = gen_rtx_AND (vmode, scratch, mask);
10643 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10646 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10647 x = gen_rtx_NOT (vmode, dest);
10648 x = gen_rtx_AND (vmode, x, op0);
10649 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10653 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10655 x = gen_rtx_AND (vmode, scratch, mask);
10657 else /* alternative 2,4 */
10659 gcc_assert (REGNO (mask) == REGNO (scratch));
10660 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10661 x = gen_rtx_AND (vmode, scratch, op1);
10663 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10665 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10667 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10668 x = gen_rtx_AND (vmode, dest, nmask);
10670 else /* alternative 3,4 */
10672 gcc_assert (REGNO (nmask) == REGNO (dest));
10674 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10675 x = gen_rtx_AND (vmode, dest, op0);
10677 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10680 x = gen_rtx_IOR (vmode, dest, scratch);
10681 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10684 /* Return TRUE or FALSE depending on whether the first SET in INSN
10685 has source and destination with matching CC modes, and that the
10686 CC mode is at least as constrained as REQ_MODE. */
10689 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10692 enum machine_mode set_mode;
10694 set = PATTERN (insn);
10695 if (GET_CODE (set) == PARALLEL)
10696 set = XVECEXP (set, 0, 0);
10697 gcc_assert (GET_CODE (set) == SET);
10698 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10700 set_mode = GET_MODE (SET_DEST (set));
10704 if (req_mode != CCNOmode
10705 && (req_mode != CCmode
10706 || XEXP (SET_SRC (set), 1) != const0_rtx))
10710 if (req_mode == CCGCmode)
10714 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10718 if (req_mode == CCZmode)
10725 gcc_unreachable ();
10728 return (GET_MODE (SET_SRC (set)) == set_mode);
10731 /* Generate insn patterns to do an integer compare of OPERANDS. */
10734 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10736 enum machine_mode cmpmode;
10739 cmpmode = SELECT_CC_MODE (code, op0, op1);
10740 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10742 /* This is very simple, but making the interface the same as in the
10743 FP case makes the rest of the code easier. */
10744 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10745 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10747 /* Return the test that should be put into the flags user, i.e.
10748 the bcc, scc, or cmov instruction. */
10749 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10752 /* Figure out whether to use ordered or unordered fp comparisons.
10753 Return the appropriate mode to use. */
10756 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10758 /* ??? In order to make all comparisons reversible, we do all comparisons
10759 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10760 all forms trapping and nontrapping comparisons, we can make inequality
10761 comparisons trapping again, since it results in better code when using
10762 FCOM based compares. */
10763 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10767 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10769 enum machine_mode mode = GET_MODE (op0);
10771 if (SCALAR_FLOAT_MODE_P (mode))
10773 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10774 return ix86_fp_compare_mode (code);
10779 /* Only zero flag is needed. */
10780 case EQ: /* ZF=0 */
10781 case NE: /* ZF!=0 */
10783 /* Codes needing carry flag. */
10784 case GEU: /* CF=0 */
10785 case GTU: /* CF=0 & ZF=0 */
10786 case LTU: /* CF=1 */
10787 case LEU: /* CF=1 | ZF=1 */
10789 /* Codes possibly doable only with sign flag when
10790 comparing against zero. */
10791 case GE: /* SF=OF or SF=0 */
10792 case LT: /* SF<>OF or SF=1 */
10793 if (op1 == const0_rtx)
10796 /* For other cases Carry flag is not required. */
10798 /* Codes doable only with sign flag when comparing
10799 against zero, but we miss jump instruction for it
10800 so we need to use relational tests against overflow
10801 that thus needs to be zero. */
10802 case GT: /* ZF=0 & SF=OF */
10803 case LE: /* ZF=1 | SF<>OF */
10804 if (op1 == const0_rtx)
10808 /* strcmp pattern do (use flags) and combine may ask us for proper
10813 gcc_unreachable ();
10817 /* Return the fixed registers used for condition codes. */
10820 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10827 /* If two condition code modes are compatible, return a condition code
10828 mode which is compatible with both. Otherwise, return
10831 static enum machine_mode
10832 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10837 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10840 if ((m1 == CCGCmode && m2 == CCGOCmode)
10841 || (m1 == CCGOCmode && m2 == CCGCmode))
10847 gcc_unreachable ();
10869 /* These are only compatible with themselves, which we already
10875 /* Split comparison code CODE into comparisons we can do using branch
10876 instructions. BYPASS_CODE is comparison code for branch that will
10877 branch around FIRST_CODE and SECOND_CODE. If some of branches
10878 is not required, set value to UNKNOWN.
10879 We never require more than two branches. */
10882 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10883 enum rtx_code *first_code,
10884 enum rtx_code *second_code)
10886 *first_code = code;
10887 *bypass_code = UNKNOWN;
10888 *second_code = UNKNOWN;
10890 /* The fcomi comparison sets flags as follows:
10900 case GT: /* GTU - CF=0 & ZF=0 */
10901 case GE: /* GEU - CF=0 */
10902 case ORDERED: /* PF=0 */
10903 case UNORDERED: /* PF=1 */
10904 case UNEQ: /* EQ - ZF=1 */
10905 case UNLT: /* LTU - CF=1 */
10906 case UNLE: /* LEU - CF=1 | ZF=1 */
10907 case LTGT: /* EQ - ZF=0 */
10909 case LT: /* LTU - CF=1 - fails on unordered */
10910 *first_code = UNLT;
10911 *bypass_code = UNORDERED;
10913 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10914 *first_code = UNLE;
10915 *bypass_code = UNORDERED;
10917 case EQ: /* EQ - ZF=1 - fails on unordered */
10918 *first_code = UNEQ;
10919 *bypass_code = UNORDERED;
10921 case NE: /* NE - ZF=0 - fails on unordered */
10922 *first_code = LTGT;
10923 *second_code = UNORDERED;
10925 case UNGE: /* GEU - CF=0 - fails on unordered */
10927 *second_code = UNORDERED;
10929 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10931 *second_code = UNORDERED;
10934 gcc_unreachable ();
10936 if (!TARGET_IEEE_FP)
10938 *second_code = UNKNOWN;
10939 *bypass_code = UNKNOWN;
10943 /* Return cost of comparison done fcom + arithmetics operations on AX.
10944 All following functions do use number of instructions as a cost metrics.
10945 In future this should be tweaked to compute bytes for optimize_size and
10946 take into account performance of various instructions on various CPUs. */
10948 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10950 if (!TARGET_IEEE_FP)
10952 /* The cost of code output by ix86_expand_fp_compare. */
10976 gcc_unreachable ();
10980 /* Return cost of comparison done using fcomi operation.
10981 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10983 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10985 enum rtx_code bypass_code, first_code, second_code;
10986 /* Return arbitrarily high cost when instruction is not supported - this
10987 prevents gcc from using it. */
10990 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10991 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10994 /* Return cost of comparison done using sahf operation.
10995 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10997 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10999 enum rtx_code bypass_code, first_code, second_code;
11000 /* Return arbitrarily high cost when instruction is not preferred - this
11001 avoids gcc from using it. */
11002 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11004 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11005 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11008 /* Compute cost of the comparison done using any method.
11009 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11011 ix86_fp_comparison_cost (enum rtx_code code)
11013 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11016 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11017 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11019 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11020 if (min > sahf_cost)
11022 if (min > fcomi_cost)
11027 /* Return true if we should use an FCOMI instruction for this
11031 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11033 enum rtx_code swapped_code = swap_condition (code);
11035 return ((ix86_fp_comparison_cost (code)
11036 == ix86_fp_comparison_fcomi_cost (code))
11037 || (ix86_fp_comparison_cost (swapped_code)
11038 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11041 /* Swap, force into registers, or otherwise massage the two operands
11042 to a fp comparison. The operands are updated in place; the new
11043 comparison code is returned. */
11045 static enum rtx_code
11046 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11048 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11049 rtx op0 = *pop0, op1 = *pop1;
11050 enum machine_mode op_mode = GET_MODE (op0);
11051 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11053 /* All of the unordered compare instructions only work on registers.
11054 The same is true of the fcomi compare instructions. The XFmode
11055 compare instructions require registers except when comparing
11056 against zero or when converting operand 1 from fixed point to
11060 && (fpcmp_mode == CCFPUmode
11061 || (op_mode == XFmode
11062 && ! (standard_80387_constant_p (op0) == 1
11063 || standard_80387_constant_p (op1) == 1)
11064 && GET_CODE (op1) != FLOAT)
11065 || ix86_use_fcomi_compare (code)))
11067 op0 = force_reg (op_mode, op0);
11068 op1 = force_reg (op_mode, op1);
11072 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11073 things around if they appear profitable, otherwise force op0
11074 into a register. */
11076 if (standard_80387_constant_p (op0) == 0
11078 && ! (standard_80387_constant_p (op1) == 0
11082 tmp = op0, op0 = op1, op1 = tmp;
11083 code = swap_condition (code);
11087 op0 = force_reg (op_mode, op0);
11089 if (CONSTANT_P (op1))
11091 int tmp = standard_80387_constant_p (op1);
11093 op1 = validize_mem (force_const_mem (op_mode, op1));
11097 op1 = force_reg (op_mode, op1);
11100 op1 = force_reg (op_mode, op1);
11104 /* Try to rearrange the comparison to make it cheaper. */
11105 if (ix86_fp_comparison_cost (code)
11106 > ix86_fp_comparison_cost (swap_condition (code))
11107 && (REG_P (op1) || !no_new_pseudos))
11110 tmp = op0, op0 = op1, op1 = tmp;
11111 code = swap_condition (code);
11113 op0 = force_reg (op_mode, op0);
11121 /* Convert comparison codes we use to represent FP comparison to integer
11122 code that will result in proper branch. Return UNKNOWN if no such code
11126 ix86_fp_compare_code_to_integer (enum rtx_code code)
11155 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11158 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11159 rtx *second_test, rtx *bypass_test)
11161 enum machine_mode fpcmp_mode, intcmp_mode;
11163 int cost = ix86_fp_comparison_cost (code);
11164 enum rtx_code bypass_code, first_code, second_code;
11166 fpcmp_mode = ix86_fp_compare_mode (code);
11167 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11170 *second_test = NULL_RTX;
11172 *bypass_test = NULL_RTX;
11174 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11176 /* Do fcomi/sahf based test when profitable. */
11177 if ((TARGET_CMOVE || TARGET_SAHF)
11178 && (bypass_code == UNKNOWN || bypass_test)
11179 && (second_code == UNKNOWN || second_test)
11180 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11184 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11185 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11191 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11192 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11194 scratch = gen_reg_rtx (HImode);
11195 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11196 emit_insn (gen_x86_sahf_1 (scratch));
11199 /* The FP codes work out to act like unsigned. */
11200 intcmp_mode = fpcmp_mode;
11202 if (bypass_code != UNKNOWN)
11203 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11204 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11206 if (second_code != UNKNOWN)
11207 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11208 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11213 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11214 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11215 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11217 scratch = gen_reg_rtx (HImode);
11218 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11220 /* In the unordered case, we have to check C2 for NaN's, which
11221 doesn't happen to work out to anything nice combination-wise.
11222 So do some bit twiddling on the value we've got in AH to come
11223 up with an appropriate set of condition codes. */
11225 intcmp_mode = CCNOmode;
11230 if (code == GT || !TARGET_IEEE_FP)
11232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11237 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11238 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11239 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11240 intcmp_mode = CCmode;
11246 if (code == LT && TARGET_IEEE_FP)
11248 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11249 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11250 intcmp_mode = CCmode;
11255 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11261 if (code == GE || !TARGET_IEEE_FP)
11263 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11268 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11269 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11276 if (code == LE && TARGET_IEEE_FP)
11278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11279 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11281 intcmp_mode = CCmode;
11286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11292 if (code == EQ && TARGET_IEEE_FP)
11294 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11295 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11296 intcmp_mode = CCmode;
11301 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11308 if (code == NE && TARGET_IEEE_FP)
11310 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11311 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11317 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11323 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11327 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11332 gcc_unreachable ();
11336 /* Return the test that should be put into the flags user, i.e.
11337 the bcc, scc, or cmov instruction. */
11338 return gen_rtx_fmt_ee (code, VOIDmode,
11339 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11344 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11347 op0 = ix86_compare_op0;
11348 op1 = ix86_compare_op1;
11351 *second_test = NULL_RTX;
11353 *bypass_test = NULL_RTX;
11355 if (ix86_compare_emitted)
11357 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11358 ix86_compare_emitted = NULL_RTX;
11360 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11362 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11363 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11364 second_test, bypass_test);
11367 ret = ix86_expand_int_compare (code, op0, op1);
11372 /* Return true if the CODE will result in nontrivial jump sequence. */
11374 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11376 enum rtx_code bypass_code, first_code, second_code;
11379 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11380 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11384 ix86_expand_branch (enum rtx_code code, rtx label)
11388 /* If we have emitted a compare insn, go straight to simple.
11389 ix86_expand_compare won't emit anything if ix86_compare_emitted
11391 if (ix86_compare_emitted)
11394 switch (GET_MODE (ix86_compare_op0))
11400 tmp = ix86_expand_compare (code, NULL, NULL);
11401 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11402 gen_rtx_LABEL_REF (VOIDmode, label),
11404 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11413 enum rtx_code bypass_code, first_code, second_code;
11415 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11416 &ix86_compare_op1);
11418 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11420 /* Check whether we will use the natural sequence with one jump. If
11421 so, we can expand jump early. Otherwise delay expansion by
11422 creating compound insn to not confuse optimizers. */
11423 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11426 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11427 gen_rtx_LABEL_REF (VOIDmode, label),
11428 pc_rtx, NULL_RTX, NULL_RTX);
11432 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11433 ix86_compare_op0, ix86_compare_op1);
11434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11435 gen_rtx_LABEL_REF (VOIDmode, label),
11437 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11439 use_fcomi = ix86_use_fcomi_compare (code);
11440 vec = rtvec_alloc (3 + !use_fcomi);
11441 RTVEC_ELT (vec, 0) = tmp;
11443 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11445 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11448 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11450 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11459 /* Expand DImode branch into multiple compare+branch. */
11461 rtx lo[2], hi[2], label2;
11462 enum rtx_code code1, code2, code3;
11463 enum machine_mode submode;
11465 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11467 tmp = ix86_compare_op0;
11468 ix86_compare_op0 = ix86_compare_op1;
11469 ix86_compare_op1 = tmp;
11470 code = swap_condition (code);
11472 if (GET_MODE (ix86_compare_op0) == DImode)
11474 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11475 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11480 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11481 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11485 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11486 avoid two branches. This costs one extra insn, so disable when
11487 optimizing for size. */
11489 if ((code == EQ || code == NE)
11491 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11496 if (hi[1] != const0_rtx)
11497 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11498 NULL_RTX, 0, OPTAB_WIDEN);
11501 if (lo[1] != const0_rtx)
11502 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11503 NULL_RTX, 0, OPTAB_WIDEN);
11505 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11506 NULL_RTX, 0, OPTAB_WIDEN);
11508 ix86_compare_op0 = tmp;
11509 ix86_compare_op1 = const0_rtx;
11510 ix86_expand_branch (code, label);
11514 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11515 op1 is a constant and the low word is zero, then we can just
11516 examine the high word. */
11518 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11521 case LT: case LTU: case GE: case GEU:
11522 ix86_compare_op0 = hi[0];
11523 ix86_compare_op1 = hi[1];
11524 ix86_expand_branch (code, label);
11530 /* Otherwise, we need two or three jumps. */
11532 label2 = gen_label_rtx ();
11535 code2 = swap_condition (code);
11536 code3 = unsigned_condition (code);
11540 case LT: case GT: case LTU: case GTU:
11543 case LE: code1 = LT; code2 = GT; break;
11544 case GE: code1 = GT; code2 = LT; break;
11545 case LEU: code1 = LTU; code2 = GTU; break;
11546 case GEU: code1 = GTU; code2 = LTU; break;
11548 case EQ: code1 = UNKNOWN; code2 = NE; break;
11549 case NE: code2 = UNKNOWN; break;
11552 gcc_unreachable ();
11557 * if (hi(a) < hi(b)) goto true;
11558 * if (hi(a) > hi(b)) goto false;
11559 * if (lo(a) < lo(b)) goto true;
11563 ix86_compare_op0 = hi[0];
11564 ix86_compare_op1 = hi[1];
11566 if (code1 != UNKNOWN)
11567 ix86_expand_branch (code1, label);
11568 if (code2 != UNKNOWN)
11569 ix86_expand_branch (code2, label2);
11571 ix86_compare_op0 = lo[0];
11572 ix86_compare_op1 = lo[1];
11573 ix86_expand_branch (code3, label);
11575 if (code2 != UNKNOWN)
11576 emit_label (label2);
11581 gcc_unreachable ();
11585 /* Split branch based on floating point condition. */
11587 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11588 rtx target1, rtx target2, rtx tmp, rtx pushed)
11590 rtx second, bypass;
11591 rtx label = NULL_RTX;
11593 int bypass_probability = -1, second_probability = -1, probability = -1;
11596 if (target2 != pc_rtx)
11599 code = reverse_condition_maybe_unordered (code);
11604 condition = ix86_expand_fp_compare (code, op1, op2,
11605 tmp, &second, &bypass);
11607 /* Remove pushed operand from stack. */
11609 ix86_free_from_memory (GET_MODE (pushed));
11611 if (split_branch_probability >= 0)
11613 /* Distribute the probabilities across the jumps.
11614 Assume the BYPASS and SECOND to be always test
11616 probability = split_branch_probability;
11618 /* Value of 1 is low enough to make no need for probability
11619 to be updated. Later we may run some experiments and see
11620 if unordered values are more frequent in practice. */
11622 bypass_probability = 1;
11624 second_probability = 1;
11626 if (bypass != NULL_RTX)
11628 label = gen_label_rtx ();
11629 i = emit_jump_insn (gen_rtx_SET
11631 gen_rtx_IF_THEN_ELSE (VOIDmode,
11633 gen_rtx_LABEL_REF (VOIDmode,
11636 if (bypass_probability >= 0)
11638 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11639 GEN_INT (bypass_probability),
11642 i = emit_jump_insn (gen_rtx_SET
11644 gen_rtx_IF_THEN_ELSE (VOIDmode,
11645 condition, target1, target2)));
11646 if (probability >= 0)
11648 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11649 GEN_INT (probability),
11651 if (second != NULL_RTX)
11653 i = emit_jump_insn (gen_rtx_SET
11655 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11657 if (second_probability >= 0)
11659 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11660 GEN_INT (second_probability),
11663 if (label != NULL_RTX)
11664 emit_label (label);
11668 ix86_expand_setcc (enum rtx_code code, rtx dest)
11670 rtx ret, tmp, tmpreg, equiv;
11671 rtx second_test, bypass_test;
11673 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11674 return 0; /* FAIL */
11676 gcc_assert (GET_MODE (dest) == QImode);
11678 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11679 PUT_MODE (ret, QImode);
11684 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11685 if (bypass_test || second_test)
11687 rtx test = second_test;
11689 rtx tmp2 = gen_reg_rtx (QImode);
11692 gcc_assert (!second_test);
11693 test = bypass_test;
11695 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11697 PUT_MODE (test, QImode);
11698 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11701 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11703 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11706 /* Attach a REG_EQUAL note describing the comparison result. */
11707 if (ix86_compare_op0 && ix86_compare_op1)
11709 equiv = simplify_gen_relational (code, QImode,
11710 GET_MODE (ix86_compare_op0),
11711 ix86_compare_op0, ix86_compare_op1);
11712 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11715 return 1; /* DONE */
11718 /* Expand comparison setting or clearing carry flag. Return true when
11719 successful and set pop for the operation. */
11721 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11723 enum machine_mode mode =
11724 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11726 /* Do not handle DImode compares that go through special path.
11727 Also we can't deal with FP compares yet. This is possible to add. */
11728 if (mode == (TARGET_64BIT ? TImode : DImode))
11731 if (SCALAR_FLOAT_MODE_P (mode))
11733 rtx second_test = NULL, bypass_test = NULL;
11734 rtx compare_op, compare_seq;
11736 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11738 /* Shortcut: following common codes never translate
11739 into carry flag compares. */
11740 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11741 || code == ORDERED || code == UNORDERED)
11744 /* These comparisons require zero flag; swap operands so they won't. */
11745 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11746 && !TARGET_IEEE_FP)
11751 code = swap_condition (code);
11754 /* Try to expand the comparison and verify that we end up with carry flag
11755 based comparison. This is fails to be true only when we decide to expand
11756 comparison using arithmetic that is not too common scenario. */
11758 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11759 &second_test, &bypass_test);
11760 compare_seq = get_insns ();
11763 if (second_test || bypass_test)
11765 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11766 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11767 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11769 code = GET_CODE (compare_op);
11770 if (code != LTU && code != GEU)
11772 emit_insn (compare_seq);
11776 if (!INTEGRAL_MODE_P (mode))
11784 /* Convert a==0 into (unsigned)a<1. */
11787 if (op1 != const0_rtx)
11790 code = (code == EQ ? LTU : GEU);
11793 /* Convert a>b into b<a or a>=b-1. */
11796 if (CONST_INT_P (op1))
11798 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11799 /* Bail out on overflow. We still can swap operands but that
11800 would force loading of the constant into register. */
11801 if (op1 == const0_rtx
11802 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11804 code = (code == GTU ? GEU : LTU);
11811 code = (code == GTU ? LTU : GEU);
11815 /* Convert a>=0 into (unsigned)a<0x80000000. */
11818 if (mode == DImode || op1 != const0_rtx)
11820 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11821 code = (code == LT ? GEU : LTU);
11825 if (mode == DImode || op1 != constm1_rtx)
11827 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11828 code = (code == LE ? GEU : LTU);
11834 /* Swapping operands may cause constant to appear as first operand. */
11835 if (!nonimmediate_operand (op0, VOIDmode))
11837 if (no_new_pseudos)
11839 op0 = force_reg (mode, op0);
11841 ix86_compare_op0 = op0;
11842 ix86_compare_op1 = op1;
11843 *pop = ix86_expand_compare (code, NULL, NULL);
11844 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11849 ix86_expand_int_movcc (rtx operands[])
11851 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11852 rtx compare_seq, compare_op;
11853 rtx second_test, bypass_test;
11854 enum machine_mode mode = GET_MODE (operands[0]);
11855 bool sign_bit_compare_p = false;;
11858 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11859 compare_seq = get_insns ();
11862 compare_code = GET_CODE (compare_op);
11864 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11865 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11866 sign_bit_compare_p = true;
11868 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11869 HImode insns, we'd be swallowed in word prefix ops. */
11871 if ((mode != HImode || TARGET_FAST_PREFIX)
11872 && (mode != (TARGET_64BIT ? TImode : DImode))
11873 && CONST_INT_P (operands[2])
11874 && CONST_INT_P (operands[3]))
11876 rtx out = operands[0];
11877 HOST_WIDE_INT ct = INTVAL (operands[2]);
11878 HOST_WIDE_INT cf = INTVAL (operands[3]);
11879 HOST_WIDE_INT diff;
11882 /* Sign bit compares are better done using shifts than we do by using
11884 if (sign_bit_compare_p
11885 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11886 ix86_compare_op1, &compare_op))
11888 /* Detect overlap between destination and compare sources. */
11891 if (!sign_bit_compare_p)
11893 bool fpcmp = false;
11895 compare_code = GET_CODE (compare_op);
11897 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11898 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11901 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11904 /* To simplify rest of code, restrict to the GEU case. */
11905 if (compare_code == LTU)
11907 HOST_WIDE_INT tmp = ct;
11910 compare_code = reverse_condition (compare_code);
11911 code = reverse_condition (code);
11916 PUT_CODE (compare_op,
11917 reverse_condition_maybe_unordered
11918 (GET_CODE (compare_op)));
11920 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11924 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11925 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11926 tmp = gen_reg_rtx (mode);
11928 if (mode == DImode)
11929 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11931 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11935 if (code == GT || code == GE)
11936 code = reverse_condition (code);
11939 HOST_WIDE_INT tmp = ct;
11944 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11945 ix86_compare_op1, VOIDmode, 0, -1);
11958 tmp = expand_simple_binop (mode, PLUS,
11960 copy_rtx (tmp), 1, OPTAB_DIRECT);
11971 tmp = expand_simple_binop (mode, IOR,
11973 copy_rtx (tmp), 1, OPTAB_DIRECT);
11975 else if (diff == -1 && ct)
11985 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11987 tmp = expand_simple_binop (mode, PLUS,
11988 copy_rtx (tmp), GEN_INT (cf),
11989 copy_rtx (tmp), 1, OPTAB_DIRECT);
11997 * andl cf - ct, dest
12007 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12010 tmp = expand_simple_binop (mode, AND,
12012 gen_int_mode (cf - ct, mode),
12013 copy_rtx (tmp), 1, OPTAB_DIRECT);
12015 tmp = expand_simple_binop (mode, PLUS,
12016 copy_rtx (tmp), GEN_INT (ct),
12017 copy_rtx (tmp), 1, OPTAB_DIRECT);
12020 if (!rtx_equal_p (tmp, out))
12021 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12023 return 1; /* DONE */
12028 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12031 tmp = ct, ct = cf, cf = tmp;
12034 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12036 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12038 /* We may be reversing unordered compare to normal compare, that
12039 is not valid in general (we may convert non-trapping condition
12040 to trapping one), however on i386 we currently emit all
12041 comparisons unordered. */
12042 compare_code = reverse_condition_maybe_unordered (compare_code);
12043 code = reverse_condition_maybe_unordered (code);
12047 compare_code = reverse_condition (compare_code);
12048 code = reverse_condition (code);
12052 compare_code = UNKNOWN;
12053 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12054 && CONST_INT_P (ix86_compare_op1))
12056 if (ix86_compare_op1 == const0_rtx
12057 && (code == LT || code == GE))
12058 compare_code = code;
12059 else if (ix86_compare_op1 == constm1_rtx)
12063 else if (code == GT)
12068 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12069 if (compare_code != UNKNOWN
12070 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12071 && (cf == -1 || ct == -1))
12073 /* If lea code below could be used, only optimize
12074 if it results in a 2 insn sequence. */
12076 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12077 || diff == 3 || diff == 5 || diff == 9)
12078 || (compare_code == LT && ct == -1)
12079 || (compare_code == GE && cf == -1))
12082 * notl op1 (if necessary)
12090 code = reverse_condition (code);
12093 out = emit_store_flag (out, code, ix86_compare_op0,
12094 ix86_compare_op1, VOIDmode, 0, -1);
12096 out = expand_simple_binop (mode, IOR,
12098 out, 1, OPTAB_DIRECT);
12099 if (out != operands[0])
12100 emit_move_insn (operands[0], out);
12102 return 1; /* DONE */
12107 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12108 || diff == 3 || diff == 5 || diff == 9)
12109 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12111 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12117 * lea cf(dest*(ct-cf)),dest
12121 * This also catches the degenerate setcc-only case.
12127 out = emit_store_flag (out, code, ix86_compare_op0,
12128 ix86_compare_op1, VOIDmode, 0, 1);
12131 /* On x86_64 the lea instruction operates on Pmode, so we need
12132 to get arithmetics done in proper mode to match. */
12134 tmp = copy_rtx (out);
12138 out1 = copy_rtx (out);
12139 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12143 tmp = gen_rtx_PLUS (mode, tmp, out1);
12149 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12152 if (!rtx_equal_p (tmp, out))
12155 out = force_operand (tmp, copy_rtx (out));
12157 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12159 if (!rtx_equal_p (out, operands[0]))
12160 emit_move_insn (operands[0], copy_rtx (out));
12162 return 1; /* DONE */
12166 * General case: Jumpful:
12167 * xorl dest,dest cmpl op1, op2
12168 * cmpl op1, op2 movl ct, dest
12169 * setcc dest jcc 1f
12170 * decl dest movl cf, dest
12171 * andl (cf-ct),dest 1:
12174 * Size 20. Size 14.
12176 * This is reasonably steep, but branch mispredict costs are
12177 * high on modern cpus, so consider failing only if optimizing
12181 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12182 && BRANCH_COST >= 2)
12186 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12191 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12193 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12195 /* We may be reversing unordered compare to normal compare,
12196 that is not valid in general (we may convert non-trapping
12197 condition to trapping one), however on i386 we currently
12198 emit all comparisons unordered. */
12199 code = reverse_condition_maybe_unordered (code);
12203 code = reverse_condition (code);
12204 if (compare_code != UNKNOWN)
12205 compare_code = reverse_condition (compare_code);
12209 if (compare_code != UNKNOWN)
12211 /* notl op1 (if needed)
12216 For x < 0 (resp. x <= -1) there will be no notl,
12217 so if possible swap the constants to get rid of the
12219 True/false will be -1/0 while code below (store flag
12220 followed by decrement) is 0/-1, so the constants need
12221 to be exchanged once more. */
12223 if (compare_code == GE || !cf)
12225 code = reverse_condition (code);
12230 HOST_WIDE_INT tmp = cf;
12235 out = emit_store_flag (out, code, ix86_compare_op0,
12236 ix86_compare_op1, VOIDmode, 0, -1);
12240 out = emit_store_flag (out, code, ix86_compare_op0,
12241 ix86_compare_op1, VOIDmode, 0, 1);
12243 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12244 copy_rtx (out), 1, OPTAB_DIRECT);
12247 out = expand_simple_binop (mode, AND, copy_rtx (out),
12248 gen_int_mode (cf - ct, mode),
12249 copy_rtx (out), 1, OPTAB_DIRECT);
12251 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12252 copy_rtx (out), 1, OPTAB_DIRECT);
12253 if (!rtx_equal_p (out, operands[0]))
12254 emit_move_insn (operands[0], copy_rtx (out));
12256 return 1; /* DONE */
12260 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12262 /* Try a few things more with specific constants and a variable. */
12265 rtx var, orig_out, out, tmp;
12267 if (BRANCH_COST <= 2)
12268 return 0; /* FAIL */
12270 /* If one of the two operands is an interesting constant, load a
12271 constant with the above and mask it in with a logical operation. */
12273 if (CONST_INT_P (operands[2]))
12276 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12277 operands[3] = constm1_rtx, op = and_optab;
12278 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12279 operands[3] = const0_rtx, op = ior_optab;
12281 return 0; /* FAIL */
12283 else if (CONST_INT_P (operands[3]))
12286 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12287 operands[2] = constm1_rtx, op = and_optab;
12288 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12289 operands[2] = const0_rtx, op = ior_optab;
12291 return 0; /* FAIL */
12294 return 0; /* FAIL */
12296 orig_out = operands[0];
12297 tmp = gen_reg_rtx (mode);
12300 /* Recurse to get the constant loaded. */
12301 if (ix86_expand_int_movcc (operands) == 0)
12302 return 0; /* FAIL */
12304 /* Mask in the interesting variable. */
12305 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12307 if (!rtx_equal_p (out, orig_out))
12308 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12310 return 1; /* DONE */
12314 * For comparison with above,
12324 if (! nonimmediate_operand (operands[2], mode))
12325 operands[2] = force_reg (mode, operands[2]);
12326 if (! nonimmediate_operand (operands[3], mode))
12327 operands[3] = force_reg (mode, operands[3]);
12329 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12331 rtx tmp = gen_reg_rtx (mode);
12332 emit_move_insn (tmp, operands[3]);
12335 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12337 rtx tmp = gen_reg_rtx (mode);
12338 emit_move_insn (tmp, operands[2]);
12342 if (! register_operand (operands[2], VOIDmode)
12344 || ! register_operand (operands[3], VOIDmode)))
12345 operands[2] = force_reg (mode, operands[2]);
12348 && ! register_operand (operands[3], VOIDmode))
12349 operands[3] = force_reg (mode, operands[3]);
12351 emit_insn (compare_seq);
12352 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12353 gen_rtx_IF_THEN_ELSE (mode,
12354 compare_op, operands[2],
12357 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12358 gen_rtx_IF_THEN_ELSE (mode,
12360 copy_rtx (operands[3]),
12361 copy_rtx (operands[0]))));
12363 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12364 gen_rtx_IF_THEN_ELSE (mode,
12366 copy_rtx (operands[2]),
12367 copy_rtx (operands[0]))));
12369 return 1; /* DONE */
12372 /* Swap, force into registers, or otherwise massage the two operands
12373 to an sse comparison with a mask result. Thus we differ a bit from
12374 ix86_prepare_fp_compare_args which expects to produce a flags result.
12376 The DEST operand exists to help determine whether to commute commutative
12377 operators. The POP0/POP1 operands are updated in place. The new
12378 comparison code is returned, or UNKNOWN if not implementable. */
12380 static enum rtx_code
12381 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12382 rtx *pop0, rtx *pop1)
12390 /* We have no LTGT as an operator. We could implement it with
12391 NE & ORDERED, but this requires an extra temporary. It's
12392 not clear that it's worth it. */
12399 /* These are supported directly. */
12406 /* For commutative operators, try to canonicalize the destination
12407 operand to be first in the comparison - this helps reload to
12408 avoid extra moves. */
12409 if (!dest || !rtx_equal_p (dest, *pop1))
12417 /* These are not supported directly. Swap the comparison operands
12418 to transform into something that is supported. */
12422 code = swap_condition (code);
12426 gcc_unreachable ();
12432 /* Detect conditional moves that exactly match min/max operational
12433 semantics. Note that this is IEEE safe, as long as we don't
12434 interchange the operands.
12436 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12437 and TRUE if the operation is successful and instructions are emitted. */
12440 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12441 rtx cmp_op1, rtx if_true, rtx if_false)
12443 enum machine_mode mode;
12449 else if (code == UNGE)
12452 if_true = if_false;
12458 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12460 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12465 mode = GET_MODE (dest);
12467 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12468 but MODE may be a vector mode and thus not appropriate. */
12469 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12471 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12474 if_true = force_reg (mode, if_true);
12475 v = gen_rtvec (2, if_true, if_false);
12476 tmp = gen_rtx_UNSPEC (mode, v, u);
12480 code = is_min ? SMIN : SMAX;
12481 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12484 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12488 /* Expand an sse vector comparison. Return the register with the result. */
12491 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12492 rtx op_true, rtx op_false)
12494 enum machine_mode mode = GET_MODE (dest);
12497 cmp_op0 = force_reg (mode, cmp_op0);
12498 if (!nonimmediate_operand (cmp_op1, mode))
12499 cmp_op1 = force_reg (mode, cmp_op1);
12502 || reg_overlap_mentioned_p (dest, op_true)
12503 || reg_overlap_mentioned_p (dest, op_false))
12504 dest = gen_reg_rtx (mode);
12506 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12507 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12512 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12513 operations. This is used for both scalar and vector conditional moves. */
12516 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12518 enum machine_mode mode = GET_MODE (dest);
12521 if (op_false == CONST0_RTX (mode))
12523 op_true = force_reg (mode, op_true);
12524 x = gen_rtx_AND (mode, cmp, op_true);
12525 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12527 else if (op_true == CONST0_RTX (mode))
12529 op_false = force_reg (mode, op_false);
12530 x = gen_rtx_NOT (mode, cmp);
12531 x = gen_rtx_AND (mode, x, op_false);
12532 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12536 op_true = force_reg (mode, op_true);
12537 op_false = force_reg (mode, op_false);
12539 t2 = gen_reg_rtx (mode);
12541 t3 = gen_reg_rtx (mode);
12545 x = gen_rtx_AND (mode, op_true, cmp);
12546 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12548 x = gen_rtx_NOT (mode, cmp);
12549 x = gen_rtx_AND (mode, x, op_false);
12550 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12552 x = gen_rtx_IOR (mode, t3, t2);
12553 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12557 /* Expand a floating-point conditional move. Return true if successful. */
12560 ix86_expand_fp_movcc (rtx operands[])
12562 enum machine_mode mode = GET_MODE (operands[0]);
12563 enum rtx_code code = GET_CODE (operands[1]);
12564 rtx tmp, compare_op, second_test, bypass_test;
12566 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12568 enum machine_mode cmode;
12570 /* Since we've no cmove for sse registers, don't force bad register
12571 allocation just to gain access to it. Deny movcc when the
12572 comparison mode doesn't match the move mode. */
12573 cmode = GET_MODE (ix86_compare_op0);
12574 if (cmode == VOIDmode)
12575 cmode = GET_MODE (ix86_compare_op1);
12579 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12581 &ix86_compare_op1);
12582 if (code == UNKNOWN)
12585 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12586 ix86_compare_op1, operands[2],
12590 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12591 ix86_compare_op1, operands[2], operands[3]);
12592 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12596 /* The floating point conditional move instructions don't directly
12597 support conditions resulting from a signed integer comparison. */
12599 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12601 /* The floating point conditional move instructions don't directly
12602 support signed integer comparisons. */
12604 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12606 gcc_assert (!second_test && !bypass_test);
12607 tmp = gen_reg_rtx (QImode);
12608 ix86_expand_setcc (code, tmp);
12610 ix86_compare_op0 = tmp;
12611 ix86_compare_op1 = const0_rtx;
12612 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12614 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12616 tmp = gen_reg_rtx (mode);
12617 emit_move_insn (tmp, operands[3]);
12620 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12622 tmp = gen_reg_rtx (mode);
12623 emit_move_insn (tmp, operands[2]);
12627 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12628 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12629 operands[2], operands[3])));
12631 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12632 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12633 operands[3], operands[0])));
12635 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12636 gen_rtx_IF_THEN_ELSE (mode, second_test,
12637 operands[2], operands[0])));
12642 /* Expand a floating-point vector conditional move; a vcond operation
12643 rather than a movcc operation. */
12646 ix86_expand_fp_vcond (rtx operands[])
12648 enum rtx_code code = GET_CODE (operands[3]);
12651 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12652 &operands[4], &operands[5]);
12653 if (code == UNKNOWN)
12656 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12657 operands[5], operands[1], operands[2]))
12660 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12661 operands[1], operands[2]);
12662 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12666 /* Expand a signed integral vector conditional move. */
12669 ix86_expand_int_vcond (rtx operands[])
12671 enum machine_mode mode = GET_MODE (operands[0]);
12672 enum rtx_code code = GET_CODE (operands[3]);
12673 bool negate = false;
12676 cop0 = operands[4];
12677 cop1 = operands[5];
12679 /* Canonicalize the comparison to EQ, GT, GTU. */
12690 code = reverse_condition (code);
12696 code = reverse_condition (code);
12702 code = swap_condition (code);
12703 x = cop0, cop0 = cop1, cop1 = x;
12707 gcc_unreachable ();
12710 /* Unsigned parallel compare is not supported by the hardware. Play some
12711 tricks to turn this into a signed comparison against 0. */
12714 cop0 = force_reg (mode, cop0);
12722 /* Perform a parallel modulo subtraction. */
12723 t1 = gen_reg_rtx (mode);
12724 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12726 /* Extract the original sign bit of op0. */
12727 mask = GEN_INT (-0x80000000);
12728 mask = gen_rtx_CONST_VECTOR (mode,
12729 gen_rtvec (4, mask, mask, mask, mask));
12730 mask = force_reg (mode, mask);
12731 t2 = gen_reg_rtx (mode);
12732 emit_insn (gen_andv4si3 (t2, cop0, mask));
12734 /* XOR it back into the result of the subtraction. This results
12735 in the sign bit set iff we saw unsigned underflow. */
12736 x = gen_reg_rtx (mode);
12737 emit_insn (gen_xorv4si3 (x, t1, t2));
12745 /* Perform a parallel unsigned saturating subtraction. */
12746 x = gen_reg_rtx (mode);
12747 emit_insn (gen_rtx_SET (VOIDmode, x,
12748 gen_rtx_US_MINUS (mode, cop0, cop1)));
12755 gcc_unreachable ();
12759 cop1 = CONST0_RTX (mode);
12762 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12763 operands[1+negate], operands[2-negate]);
12765 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12766 operands[2-negate]);
12770 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12771 true if we should do zero extension, else sign extension. HIGH_P is
12772 true if we want the N/2 high elements, else the low elements. */
12775 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12777 enum machine_mode imode = GET_MODE (operands[1]);
12778 rtx (*unpack)(rtx, rtx, rtx);
12785 unpack = gen_vec_interleave_highv16qi;
12787 unpack = gen_vec_interleave_lowv16qi;
12791 unpack = gen_vec_interleave_highv8hi;
12793 unpack = gen_vec_interleave_lowv8hi;
12797 unpack = gen_vec_interleave_highv4si;
12799 unpack = gen_vec_interleave_lowv4si;
12802 gcc_unreachable ();
12805 dest = gen_lowpart (imode, operands[0]);
12808 se = force_reg (imode, CONST0_RTX (imode));
12810 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12811 operands[1], pc_rtx, pc_rtx);
12813 emit_insn (unpack (dest, operands[1], se));
12816 /* Expand conditional increment or decrement using adb/sbb instructions.
12817 The default case using setcc followed by the conditional move can be
12818 done by generic code. */
12820 ix86_expand_int_addcc (rtx operands[])
12822 enum rtx_code code = GET_CODE (operands[1]);
12824 rtx val = const0_rtx;
12825 bool fpcmp = false;
12826 enum machine_mode mode = GET_MODE (operands[0]);
12828 if (operands[3] != const1_rtx
12829 && operands[3] != constm1_rtx)
12831 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12832 ix86_compare_op1, &compare_op))
12834 code = GET_CODE (compare_op);
12836 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12837 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12840 code = ix86_fp_compare_code_to_integer (code);
12847 PUT_CODE (compare_op,
12848 reverse_condition_maybe_unordered
12849 (GET_CODE (compare_op)));
12851 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12853 PUT_MODE (compare_op, mode);
12855 /* Construct either adc or sbb insn. */
12856 if ((code == LTU) == (operands[3] == constm1_rtx))
12858 switch (GET_MODE (operands[0]))
12861 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12864 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12867 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12870 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12873 gcc_unreachable ();
12878 switch (GET_MODE (operands[0]))
12881 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12884 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12887 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12890 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12893 gcc_unreachable ();
12896 return 1; /* DONE */
12900 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12901 works for floating pointer parameters and nonoffsetable memories.
12902 For pushes, it returns just stack offsets; the values will be saved
12903 in the right order. Maximally three parts are generated. */
12906 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12911 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12913 size = (GET_MODE_SIZE (mode) + 4) / 8;
12915 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12916 gcc_assert (size >= 2 && size <= 3);
12918 /* Optimize constant pool reference to immediates. This is used by fp
12919 moves, that force all constants to memory to allow combining. */
12920 if (MEM_P (operand) && MEM_READONLY_P (operand))
12922 rtx tmp = maybe_get_pool_constant (operand);
12927 if (MEM_P (operand) && !offsettable_memref_p (operand))
12929 /* The only non-offsetable memories we handle are pushes. */
12930 int ok = push_operand (operand, VOIDmode);
12934 operand = copy_rtx (operand);
12935 PUT_MODE (operand, Pmode);
12936 parts[0] = parts[1] = parts[2] = operand;
12940 if (GET_CODE (operand) == CONST_VECTOR)
12942 enum machine_mode imode = int_mode_for_mode (mode);
12943 /* Caution: if we looked through a constant pool memory above,
12944 the operand may actually have a different mode now. That's
12945 ok, since we want to pun this all the way back to an integer. */
12946 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12947 gcc_assert (operand != NULL);
12953 if (mode == DImode)
12954 split_di (&operand, 1, &parts[0], &parts[1]);
12957 if (REG_P (operand))
12959 gcc_assert (reload_completed);
12960 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12961 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12963 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12965 else if (offsettable_memref_p (operand))
12967 operand = adjust_address (operand, SImode, 0);
12968 parts[0] = operand;
12969 parts[1] = adjust_address (operand, SImode, 4);
12971 parts[2] = adjust_address (operand, SImode, 8);
12973 else if (GET_CODE (operand) == CONST_DOUBLE)
12978 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12982 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12983 parts[2] = gen_int_mode (l[2], SImode);
12986 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12989 gcc_unreachable ();
12991 parts[1] = gen_int_mode (l[1], SImode);
12992 parts[0] = gen_int_mode (l[0], SImode);
12995 gcc_unreachable ();
13000 if (mode == TImode)
13001 split_ti (&operand, 1, &parts[0], &parts[1]);
13002 if (mode == XFmode || mode == TFmode)
13004 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13005 if (REG_P (operand))
13007 gcc_assert (reload_completed);
13008 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13009 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13011 else if (offsettable_memref_p (operand))
13013 operand = adjust_address (operand, DImode, 0);
13014 parts[0] = operand;
13015 parts[1] = adjust_address (operand, upper_mode, 8);
13017 else if (GET_CODE (operand) == CONST_DOUBLE)
13022 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13023 real_to_target (l, &r, mode);
13025 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13026 if (HOST_BITS_PER_WIDE_INT >= 64)
13029 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13030 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13033 parts[0] = immed_double_const (l[0], l[1], DImode);
13035 if (upper_mode == SImode)
13036 parts[1] = gen_int_mode (l[2], SImode);
13037 else if (HOST_BITS_PER_WIDE_INT >= 64)
13040 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13041 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13044 parts[1] = immed_double_const (l[2], l[3], DImode);
13047 gcc_unreachable ();
13054 /* Emit insns to perform a move or push of DI, DF, and XF values.
13055 Return false when normal moves are needed; true when all required
13056 insns have been emitted. Operands 2-4 contain the input values
13057 int the correct order; operands 5-7 contain the output values. */
13060 ix86_split_long_move (rtx operands[])
13065 int collisions = 0;
13066 enum machine_mode mode = GET_MODE (operands[0]);
13068 /* The DFmode expanders may ask us to move double.
13069 For 64bit target this is single move. By hiding the fact
13070 here we simplify i386.md splitters. */
13071 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13073 /* Optimize constant pool reference to immediates. This is used by
13074 fp moves, that force all constants to memory to allow combining. */
13076 if (MEM_P (operands[1])
13077 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13078 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13079 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13080 if (push_operand (operands[0], VOIDmode))
13082 operands[0] = copy_rtx (operands[0]);
13083 PUT_MODE (operands[0], Pmode);
13086 operands[0] = gen_lowpart (DImode, operands[0]);
13087 operands[1] = gen_lowpart (DImode, operands[1]);
13088 emit_move_insn (operands[0], operands[1]);
13092 /* The only non-offsettable memory we handle is push. */
13093 if (push_operand (operands[0], VOIDmode))
13096 gcc_assert (!MEM_P (operands[0])
13097 || offsettable_memref_p (operands[0]));
13099 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13100 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13102 /* When emitting push, take care for source operands on the stack. */
13103 if (push && MEM_P (operands[1])
13104 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13107 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13108 XEXP (part[1][2], 0));
13109 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13110 XEXP (part[1][1], 0));
13113 /* We need to do copy in the right order in case an address register
13114 of the source overlaps the destination. */
13115 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13117 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13119 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13122 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13125 /* Collision in the middle part can be handled by reordering. */
13126 if (collisions == 1 && nparts == 3
13127 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13130 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13131 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13134 /* If there are more collisions, we can't handle it by reordering.
13135 Do an lea to the last part and use only one colliding move. */
13136 else if (collisions > 1)
13142 base = part[0][nparts - 1];
13144 /* Handle the case when the last part isn't valid for lea.
13145 Happens in 64-bit mode storing the 12-byte XFmode. */
13146 if (GET_MODE (base) != Pmode)
13147 base = gen_rtx_REG (Pmode, REGNO (base));
13149 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13150 part[1][0] = replace_equiv_address (part[1][0], base);
13151 part[1][1] = replace_equiv_address (part[1][1],
13152 plus_constant (base, UNITS_PER_WORD));
13154 part[1][2] = replace_equiv_address (part[1][2],
13155 plus_constant (base, 8));
13165 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13166 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13167 emit_move_insn (part[0][2], part[1][2]);
13172 /* In 64bit mode we don't have 32bit push available. In case this is
13173 register, it is OK - we will just use larger counterpart. We also
13174 retype memory - these comes from attempt to avoid REX prefix on
13175 moving of second half of TFmode value. */
13176 if (GET_MODE (part[1][1]) == SImode)
13178 switch (GET_CODE (part[1][1]))
13181 part[1][1] = adjust_address (part[1][1], DImode, 0);
13185 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13189 gcc_unreachable ();
13192 if (GET_MODE (part[1][0]) == SImode)
13193 part[1][0] = part[1][1];
13196 emit_move_insn (part[0][1], part[1][1]);
13197 emit_move_insn (part[0][0], part[1][0]);
13201 /* Choose correct order to not overwrite the source before it is copied. */
13202 if ((REG_P (part[0][0])
13203 && REG_P (part[1][1])
13204 && (REGNO (part[0][0]) == REGNO (part[1][1])
13206 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13208 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13212 operands[2] = part[0][2];
13213 operands[3] = part[0][1];
13214 operands[4] = part[0][0];
13215 operands[5] = part[1][2];
13216 operands[6] = part[1][1];
13217 operands[7] = part[1][0];
13221 operands[2] = part[0][1];
13222 operands[3] = part[0][0];
13223 operands[5] = part[1][1];
13224 operands[6] = part[1][0];
13231 operands[2] = part[0][0];
13232 operands[3] = part[0][1];
13233 operands[4] = part[0][2];
13234 operands[5] = part[1][0];
13235 operands[6] = part[1][1];
13236 operands[7] = part[1][2];
13240 operands[2] = part[0][0];
13241 operands[3] = part[0][1];
13242 operands[5] = part[1][0];
13243 operands[6] = part[1][1];
13247 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13250 if (CONST_INT_P (operands[5])
13251 && operands[5] != const0_rtx
13252 && REG_P (operands[2]))
13254 if (CONST_INT_P (operands[6])
13255 && INTVAL (operands[6]) == INTVAL (operands[5]))
13256 operands[6] = operands[2];
13259 && CONST_INT_P (operands[7])
13260 && INTVAL (operands[7]) == INTVAL (operands[5]))
13261 operands[7] = operands[2];
13265 && CONST_INT_P (operands[6])
13266 && operands[6] != const0_rtx
13267 && REG_P (operands[3])
13268 && CONST_INT_P (operands[7])
13269 && INTVAL (operands[7]) == INTVAL (operands[6]))
13270 operands[7] = operands[3];
13273 emit_move_insn (operands[2], operands[5]);
13274 emit_move_insn (operands[3], operands[6]);
13276 emit_move_insn (operands[4], operands[7]);
13281 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13282 left shift by a constant, either using a single shift or
13283 a sequence of add instructions. */
13286 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13290 emit_insn ((mode == DImode
13292 : gen_adddi3) (operand, operand, operand));
13294 else if (!optimize_size
13295 && count * ix86_cost->add <= ix86_cost->shift_const)
13298 for (i=0; i<count; i++)
13300 emit_insn ((mode == DImode
13302 : gen_adddi3) (operand, operand, operand));
13306 emit_insn ((mode == DImode
13308 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13312 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13314 rtx low[2], high[2];
13316 const int single_width = mode == DImode ? 32 : 64;
13318 if (CONST_INT_P (operands[2]))
13320 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13321 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13323 if (count >= single_width)
13325 emit_move_insn (high[0], low[1]);
13326 emit_move_insn (low[0], const0_rtx);
13328 if (count > single_width)
13329 ix86_expand_ashl_const (high[0], count - single_width, mode);
13333 if (!rtx_equal_p (operands[0], operands[1]))
13334 emit_move_insn (operands[0], operands[1]);
13335 emit_insn ((mode == DImode
13337 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13338 ix86_expand_ashl_const (low[0], count, mode);
13343 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13345 if (operands[1] == const1_rtx)
13347 /* Assuming we've chosen a QImode capable registers, then 1 << N
13348 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13349 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13351 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13353 ix86_expand_clear (low[0]);
13354 ix86_expand_clear (high[0]);
13355 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13357 d = gen_lowpart (QImode, low[0]);
13358 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13359 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13360 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13362 d = gen_lowpart (QImode, high[0]);
13363 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13364 s = gen_rtx_NE (QImode, flags, const0_rtx);
13365 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13368 /* Otherwise, we can get the same results by manually performing
13369 a bit extract operation on bit 5/6, and then performing the two
13370 shifts. The two methods of getting 0/1 into low/high are exactly
13371 the same size. Avoiding the shift in the bit extract case helps
13372 pentium4 a bit; no one else seems to care much either way. */
13377 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13378 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13380 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13381 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13383 emit_insn ((mode == DImode
13385 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13386 emit_insn ((mode == DImode
13388 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13389 emit_move_insn (low[0], high[0]);
13390 emit_insn ((mode == DImode
13392 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13395 emit_insn ((mode == DImode
13397 : gen_ashldi3) (low[0], low[0], operands[2]));
13398 emit_insn ((mode == DImode
13400 : gen_ashldi3) (high[0], high[0], operands[2]));
13404 if (operands[1] == constm1_rtx)
13406 /* For -1 << N, we can avoid the shld instruction, because we
13407 know that we're shifting 0...31/63 ones into a -1. */
13408 emit_move_insn (low[0], constm1_rtx);
13410 emit_move_insn (high[0], low[0]);
13412 emit_move_insn (high[0], constm1_rtx);
13416 if (!rtx_equal_p (operands[0], operands[1]))
13417 emit_move_insn (operands[0], operands[1]);
13419 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13420 emit_insn ((mode == DImode
13422 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13425 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13427 if (TARGET_CMOVE && scratch)
13429 ix86_expand_clear (scratch);
13430 emit_insn ((mode == DImode
13431 ? gen_x86_shift_adj_1
13432 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13435 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13439 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13441 rtx low[2], high[2];
13443 const int single_width = mode == DImode ? 32 : 64;
13445 if (CONST_INT_P (operands[2]))
13447 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13448 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13450 if (count == single_width * 2 - 1)
13452 emit_move_insn (high[0], high[1]);
13453 emit_insn ((mode == DImode
13455 : gen_ashrdi3) (high[0], high[0],
13456 GEN_INT (single_width - 1)));
13457 emit_move_insn (low[0], high[0]);
13460 else if (count >= single_width)
13462 emit_move_insn (low[0], high[1]);
13463 emit_move_insn (high[0], low[0]);
13464 emit_insn ((mode == DImode
13466 : gen_ashrdi3) (high[0], high[0],
13467 GEN_INT (single_width - 1)));
13468 if (count > single_width)
13469 emit_insn ((mode == DImode
13471 : gen_ashrdi3) (low[0], low[0],
13472 GEN_INT (count - single_width)));
13476 if (!rtx_equal_p (operands[0], operands[1]))
13477 emit_move_insn (operands[0], operands[1]);
13478 emit_insn ((mode == DImode
13480 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13481 emit_insn ((mode == DImode
13483 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13488 if (!rtx_equal_p (operands[0], operands[1]))
13489 emit_move_insn (operands[0], operands[1]);
13491 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13493 emit_insn ((mode == DImode
13495 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13496 emit_insn ((mode == DImode
13498 : gen_ashrdi3) (high[0], high[0], operands[2]));
13500 if (TARGET_CMOVE && scratch)
13502 emit_move_insn (scratch, high[0]);
13503 emit_insn ((mode == DImode
13505 : gen_ashrdi3) (scratch, scratch,
13506 GEN_INT (single_width - 1)));
13507 emit_insn ((mode == DImode
13508 ? gen_x86_shift_adj_1
13509 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13513 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13518 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13520 rtx low[2], high[2];
13522 const int single_width = mode == DImode ? 32 : 64;
13524 if (CONST_INT_P (operands[2]))
13526 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13527 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13529 if (count >= single_width)
13531 emit_move_insn (low[0], high[1]);
13532 ix86_expand_clear (high[0]);
13534 if (count > single_width)
13535 emit_insn ((mode == DImode
13537 : gen_lshrdi3) (low[0], low[0],
13538 GEN_INT (count - single_width)));
13542 if (!rtx_equal_p (operands[0], operands[1]))
13543 emit_move_insn (operands[0], operands[1]);
13544 emit_insn ((mode == DImode
13546 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13547 emit_insn ((mode == DImode
13549 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13554 if (!rtx_equal_p (operands[0], operands[1]))
13555 emit_move_insn (operands[0], operands[1]);
13557 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13559 emit_insn ((mode == DImode
13561 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13562 emit_insn ((mode == DImode
13564 : gen_lshrdi3) (high[0], high[0], operands[2]));
13566 /* Heh. By reversing the arguments, we can reuse this pattern. */
13567 if (TARGET_CMOVE && scratch)
13569 ix86_expand_clear (scratch);
13570 emit_insn ((mode == DImode
13571 ? gen_x86_shift_adj_1
13572 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13576 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13580 /* Predict just emitted jump instruction to be taken with probability PROB. */
13582 predict_jump (int prob)
13584 rtx insn = get_last_insn ();
13585 gcc_assert (JUMP_P (insn));
13587 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13592 /* Helper function for the string operations below. Dest VARIABLE whether
13593 it is aligned to VALUE bytes. If true, jump to the label. */
13595 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13597 rtx label = gen_label_rtx ();
13598 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13599 if (GET_MODE (variable) == DImode)
13600 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13602 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13603 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13606 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13608 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13612 /* Adjust COUNTER by the VALUE. */
13614 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13616 if (GET_MODE (countreg) == DImode)
13617 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13619 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13622 /* Zero extend possibly SImode EXP to Pmode register. */
13624 ix86_zero_extend_to_Pmode (rtx exp)
13627 if (GET_MODE (exp) == VOIDmode)
13628 return force_reg (Pmode, exp);
13629 if (GET_MODE (exp) == Pmode)
13630 return copy_to_mode_reg (Pmode, exp);
13631 r = gen_reg_rtx (Pmode);
13632 emit_insn (gen_zero_extendsidi2 (r, exp));
13636 /* Divide COUNTREG by SCALE. */
13638 scale_counter (rtx countreg, int scale)
13641 rtx piece_size_mask;
13645 if (CONST_INT_P (countreg))
13646 return GEN_INT (INTVAL (countreg) / scale);
13647 gcc_assert (REG_P (countreg));
13649 piece_size_mask = GEN_INT (scale - 1);
13650 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13651 GEN_INT (exact_log2 (scale)),
13652 NULL, 1, OPTAB_DIRECT);
13656 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13657 DImode for constant loop counts. */
13659 static enum machine_mode
13660 counter_mode (rtx count_exp)
13662 if (GET_MODE (count_exp) != VOIDmode)
13663 return GET_MODE (count_exp);
13664 if (GET_CODE (count_exp) != CONST_INT)
13666 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13671 /* When SRCPTR is non-NULL, output simple loop to move memory
13672 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13673 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13674 equivalent loop to set memory by VALUE (supposed to be in MODE).
13676 The size is rounded down to whole number of chunk size moved at once.
13677 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13681 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13682 rtx destptr, rtx srcptr, rtx value,
13683 rtx count, enum machine_mode mode, int unroll,
13686 rtx out_label, top_label, iter, tmp;
13687 enum machine_mode iter_mode = counter_mode (count);
13688 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13689 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13695 top_label = gen_label_rtx ();
13696 out_label = gen_label_rtx ();
13697 iter = gen_reg_rtx (iter_mode);
13699 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13700 NULL, 1, OPTAB_DIRECT);
13701 /* Those two should combine. */
13702 if (piece_size == const1_rtx)
13704 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13706 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13708 emit_move_insn (iter, const0_rtx);
13710 emit_label (top_label);
13712 tmp = convert_modes (Pmode, iter_mode, iter, true);
13713 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13714 destmem = change_address (destmem, mode, x_addr);
13718 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13719 srcmem = change_address (srcmem, mode, y_addr);
13721 /* When unrolling for chips that reorder memory reads and writes,
13722 we can save registers by using single temporary.
13723 Also using 4 temporaries is overkill in 32bit mode. */
13724 if (!TARGET_64BIT && 0)
13726 for (i = 0; i < unroll; i++)
13731 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13733 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13735 emit_move_insn (destmem, srcmem);
13741 gcc_assert (unroll <= 4);
13742 for (i = 0; i < unroll; i++)
13744 tmpreg[i] = gen_reg_rtx (mode);
13748 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13750 emit_move_insn (tmpreg[i], srcmem);
13752 for (i = 0; i < unroll; i++)
13757 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13759 emit_move_insn (destmem, tmpreg[i]);
13764 for (i = 0; i < unroll; i++)
13768 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13769 emit_move_insn (destmem, value);
13772 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13773 true, OPTAB_LIB_WIDEN);
13775 emit_move_insn (iter, tmp);
13777 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13779 if (expected_size != -1)
13781 expected_size /= GET_MODE_SIZE (mode) * unroll;
13782 if (expected_size == 0)
13784 else if (expected_size > REG_BR_PROB_BASE)
13785 predict_jump (REG_BR_PROB_BASE - 1);
13787 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13790 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13791 iter = ix86_zero_extend_to_Pmode (iter);
13792 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13793 true, OPTAB_LIB_WIDEN);
13794 if (tmp != destptr)
13795 emit_move_insn (destptr, tmp);
13798 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13799 true, OPTAB_LIB_WIDEN);
13801 emit_move_insn (srcptr, tmp);
13803 emit_label (out_label);
13806 /* Output "rep; mov" instruction.
13807 Arguments have same meaning as for previous function */
13809 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13810 rtx destptr, rtx srcptr,
13812 enum machine_mode mode)
13818 /* If the size is known, it is shorter to use rep movs. */
13819 if (mode == QImode && CONST_INT_P (count)
13820 && !(INTVAL (count) & 3))
13823 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13824 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13825 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13826 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13827 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13828 if (mode != QImode)
13830 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13831 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13832 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13833 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13834 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13835 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13839 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13840 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13842 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13846 /* Output "rep; stos" instruction.
13847 Arguments have same meaning as for previous function */
13849 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13851 enum machine_mode mode)
13856 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13857 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13858 value = force_reg (mode, gen_lowpart (mode, value));
13859 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13860 if (mode != QImode)
13862 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13863 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13864 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13867 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13868 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13872 emit_strmov (rtx destmem, rtx srcmem,
13873 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13875 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13876 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13877 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13880 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13882 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13883 rtx destptr, rtx srcptr, rtx count, int max_size)
13886 if (CONST_INT_P (count))
13888 HOST_WIDE_INT countval = INTVAL (count);
13891 if ((countval & 0x10) && max_size > 16)
13895 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13896 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13899 gcc_unreachable ();
13902 if ((countval & 0x08) && max_size > 8)
13905 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13908 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13909 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13913 if ((countval & 0x04) && max_size > 4)
13915 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13918 if ((countval & 0x02) && max_size > 2)
13920 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13923 if ((countval & 0x01) && max_size > 1)
13925 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13932 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13933 count, 1, OPTAB_DIRECT);
13934 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13935 count, QImode, 1, 4);
13939 /* When there are stringops, we can cheaply increase dest and src pointers.
13940 Otherwise we save code size by maintaining offset (zero is readily
13941 available from preceding rep operation) and using x86 addressing modes.
13943 if (TARGET_SINGLE_STRINGOP)
13947 rtx label = ix86_expand_aligntest (count, 4, true);
13948 src = change_address (srcmem, SImode, srcptr);
13949 dest = change_address (destmem, SImode, destptr);
13950 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13951 emit_label (label);
13952 LABEL_NUSES (label) = 1;
13956 rtx label = ix86_expand_aligntest (count, 2, true);
13957 src = change_address (srcmem, HImode, srcptr);
13958 dest = change_address (destmem, HImode, destptr);
13959 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13960 emit_label (label);
13961 LABEL_NUSES (label) = 1;
13965 rtx label = ix86_expand_aligntest (count, 1, true);
13966 src = change_address (srcmem, QImode, srcptr);
13967 dest = change_address (destmem, QImode, destptr);
13968 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13969 emit_label (label);
13970 LABEL_NUSES (label) = 1;
13975 rtx offset = force_reg (Pmode, const0_rtx);
13980 rtx label = ix86_expand_aligntest (count, 4, true);
13981 src = change_address (srcmem, SImode, srcptr);
13982 dest = change_address (destmem, SImode, destptr);
13983 emit_move_insn (dest, src);
13984 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13985 true, OPTAB_LIB_WIDEN);
13987 emit_move_insn (offset, tmp);
13988 emit_label (label);
13989 LABEL_NUSES (label) = 1;
13993 rtx label = ix86_expand_aligntest (count, 2, true);
13994 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13995 src = change_address (srcmem, HImode, tmp);
13996 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13997 dest = change_address (destmem, HImode, tmp);
13998 emit_move_insn (dest, src);
13999 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14000 true, OPTAB_LIB_WIDEN);
14002 emit_move_insn (offset, tmp);
14003 emit_label (label);
14004 LABEL_NUSES (label) = 1;
14008 rtx label = ix86_expand_aligntest (count, 1, true);
14009 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14010 src = change_address (srcmem, QImode, tmp);
14011 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14012 dest = change_address (destmem, QImode, tmp);
14013 emit_move_insn (dest, src);
14014 emit_label (label);
14015 LABEL_NUSES (label) = 1;
14020 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14022 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14023 rtx count, int max_size)
14026 expand_simple_binop (counter_mode (count), AND, count,
14027 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14028 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14029 gen_lowpart (QImode, value), count, QImode,
14033 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14035 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14039 if (CONST_INT_P (count))
14041 HOST_WIDE_INT countval = INTVAL (count);
14044 if ((countval & 0x10) && max_size > 16)
14048 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14049 emit_insn (gen_strset (destptr, dest, value));
14050 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14051 emit_insn (gen_strset (destptr, dest, value));
14054 gcc_unreachable ();
14057 if ((countval & 0x08) && max_size > 8)
14061 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14062 emit_insn (gen_strset (destptr, dest, value));
14066 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14067 emit_insn (gen_strset (destptr, dest, value));
14068 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14069 emit_insn (gen_strset (destptr, dest, value));
14073 if ((countval & 0x04) && max_size > 4)
14075 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14076 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14079 if ((countval & 0x02) && max_size > 2)
14081 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14082 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14085 if ((countval & 0x01) && max_size > 1)
14087 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14088 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14095 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14100 rtx label = ix86_expand_aligntest (count, 16, true);
14103 dest = change_address (destmem, DImode, destptr);
14104 emit_insn (gen_strset (destptr, dest, value));
14105 emit_insn (gen_strset (destptr, dest, value));
14109 dest = change_address (destmem, SImode, destptr);
14110 emit_insn (gen_strset (destptr, dest, value));
14111 emit_insn (gen_strset (destptr, dest, value));
14112 emit_insn (gen_strset (destptr, dest, value));
14113 emit_insn (gen_strset (destptr, dest, value));
14115 emit_label (label);
14116 LABEL_NUSES (label) = 1;
14120 rtx label = ix86_expand_aligntest (count, 8, true);
14123 dest = change_address (destmem, DImode, destptr);
14124 emit_insn (gen_strset (destptr, dest, value));
14128 dest = change_address (destmem, SImode, destptr);
14129 emit_insn (gen_strset (destptr, dest, value));
14130 emit_insn (gen_strset (destptr, dest, value));
14132 emit_label (label);
14133 LABEL_NUSES (label) = 1;
14137 rtx label = ix86_expand_aligntest (count, 4, true);
14138 dest = change_address (destmem, SImode, destptr);
14139 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14140 emit_label (label);
14141 LABEL_NUSES (label) = 1;
14145 rtx label = ix86_expand_aligntest (count, 2, true);
14146 dest = change_address (destmem, HImode, destptr);
14147 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14148 emit_label (label);
14149 LABEL_NUSES (label) = 1;
14153 rtx label = ix86_expand_aligntest (count, 1, true);
14154 dest = change_address (destmem, QImode, destptr);
14155 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14156 emit_label (label);
14157 LABEL_NUSES (label) = 1;
14161 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14162 DESIRED_ALIGNMENT. */
14164 expand_movmem_prologue (rtx destmem, rtx srcmem,
14165 rtx destptr, rtx srcptr, rtx count,
14166 int align, int desired_alignment)
14168 if (align <= 1 && desired_alignment > 1)
14170 rtx label = ix86_expand_aligntest (destptr, 1, false);
14171 srcmem = change_address (srcmem, QImode, srcptr);
14172 destmem = change_address (destmem, QImode, destptr);
14173 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14174 ix86_adjust_counter (count, 1);
14175 emit_label (label);
14176 LABEL_NUSES (label) = 1;
14178 if (align <= 2 && desired_alignment > 2)
14180 rtx label = ix86_expand_aligntest (destptr, 2, false);
14181 srcmem = change_address (srcmem, HImode, srcptr);
14182 destmem = change_address (destmem, HImode, destptr);
14183 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14184 ix86_adjust_counter (count, 2);
14185 emit_label (label);
14186 LABEL_NUSES (label) = 1;
14188 if (align <= 4 && desired_alignment > 4)
14190 rtx label = ix86_expand_aligntest (destptr, 4, false);
14191 srcmem = change_address (srcmem, SImode, srcptr);
14192 destmem = change_address (destmem, SImode, destptr);
14193 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14194 ix86_adjust_counter (count, 4);
14195 emit_label (label);
14196 LABEL_NUSES (label) = 1;
14198 gcc_assert (desired_alignment <= 8);
14201 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14202 DESIRED_ALIGNMENT. */
14204 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14205 int align, int desired_alignment)
14207 if (align <= 1 && desired_alignment > 1)
14209 rtx label = ix86_expand_aligntest (destptr, 1, false);
14210 destmem = change_address (destmem, QImode, destptr);
14211 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14212 ix86_adjust_counter (count, 1);
14213 emit_label (label);
14214 LABEL_NUSES (label) = 1;
14216 if (align <= 2 && desired_alignment > 2)
14218 rtx label = ix86_expand_aligntest (destptr, 2, false);
14219 destmem = change_address (destmem, HImode, destptr);
14220 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14221 ix86_adjust_counter (count, 2);
14222 emit_label (label);
14223 LABEL_NUSES (label) = 1;
14225 if (align <= 4 && desired_alignment > 4)
14227 rtx label = ix86_expand_aligntest (destptr, 4, false);
14228 destmem = change_address (destmem, SImode, destptr);
14229 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14230 ix86_adjust_counter (count, 4);
14231 emit_label (label);
14232 LABEL_NUSES (label) = 1;
14234 gcc_assert (desired_alignment <= 8);
14237 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14238 static enum stringop_alg
14239 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14240 int *dynamic_check)
14242 const struct stringop_algs * algs;
14244 *dynamic_check = -1;
14246 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14248 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14249 if (stringop_alg != no_stringop)
14250 return stringop_alg;
14251 /* rep; movq or rep; movl is the smallest variant. */
14252 else if (optimize_size)
14254 if (!count || (count & 3))
14255 return rep_prefix_1_byte;
14257 return rep_prefix_4_byte;
14259 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14261 else if (expected_size != -1 && expected_size < 4)
14262 return loop_1_byte;
14263 else if (expected_size != -1)
14266 enum stringop_alg alg = libcall;
14267 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14269 gcc_assert (algs->size[i].max);
14270 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14272 if (algs->size[i].alg != libcall)
14273 alg = algs->size[i].alg;
14274 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14275 last non-libcall inline algorithm. */
14276 if (TARGET_INLINE_ALL_STRINGOPS)
14278 /* When the current size is best to be copied by a libcall,
14279 but we are still forced to inline, run the heuristic bellow
14280 that will pick code for medium sized blocks. */
14281 if (alg != libcall)
14286 return algs->size[i].alg;
14289 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14291 /* When asked to inline the call anyway, try to pick meaningful choice.
14292 We look for maximal size of block that is faster to copy by hand and
14293 take blocks of at most of that size guessing that average size will
14294 be roughly half of the block.
14296 If this turns out to be bad, we might simply specify the preferred
14297 choice in ix86_costs. */
14298 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14299 && algs->unknown_size == libcall)
14302 enum stringop_alg alg;
14305 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14306 if (algs->size[i].alg != libcall && algs->size[i].alg)
14307 max = algs->size[i].max;
14310 alg = decide_alg (count, max / 2, memset, dynamic_check);
14311 gcc_assert (*dynamic_check == -1);
14312 gcc_assert (alg != libcall);
14313 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14314 *dynamic_check = max;
14317 return algs->unknown_size;
14320 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14321 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14323 decide_alignment (int align,
14324 enum stringop_alg alg,
14327 int desired_align = 0;
14331 gcc_unreachable ();
14333 case unrolled_loop:
14334 desired_align = GET_MODE_SIZE (Pmode);
14336 case rep_prefix_8_byte:
14339 case rep_prefix_4_byte:
14340 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14341 copying whole cacheline at once. */
14342 if (TARGET_PENTIUMPRO)
14347 case rep_prefix_1_byte:
14348 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14349 copying whole cacheline at once. */
14350 if (TARGET_PENTIUMPRO)
14364 if (desired_align < align)
14365 desired_align = align;
14366 if (expected_size != -1 && expected_size < 4)
14367 desired_align = align;
14368 return desired_align;
14371 /* Return the smallest power of 2 greater than VAL. */
14373 smallest_pow2_greater_than (int val)
14381 /* Expand string move (memcpy) operation. Use i386 string operations when
14382 profitable. expand_clrmem contains similar code. The code depends upon
14383 architecture, block size and alignment, but always has the same
14386 1) Prologue guard: Conditional that jumps up to epilogues for small
14387 blocks that can be handled by epilogue alone. This is faster but
14388 also needed for correctness, since prologue assume the block is larger
14389 than the desired alignment.
14391 Optional dynamic check for size and libcall for large
14392 blocks is emitted here too, with -minline-stringops-dynamically.
14394 2) Prologue: copy first few bytes in order to get destination aligned
14395 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14396 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14397 We emit either a jump tree on power of two sized blocks, or a byte loop.
14399 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14400 with specified algorithm.
14402 4) Epilogue: code copying tail of the block that is too small to be
14403 handled by main body (or up to size guarded by prologue guard). */
14406 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14407 rtx expected_align_exp, rtx expected_size_exp)
14413 rtx jump_around_label = NULL;
14414 HOST_WIDE_INT align = 1;
14415 unsigned HOST_WIDE_INT count = 0;
14416 HOST_WIDE_INT expected_size = -1;
14417 int size_needed = 0, epilogue_size_needed;
14418 int desired_align = 0;
14419 enum stringop_alg alg;
14422 if (CONST_INT_P (align_exp))
14423 align = INTVAL (align_exp);
14424 /* i386 can do misaligned access on reasonably increased cost. */
14425 if (CONST_INT_P (expected_align_exp)
14426 && INTVAL (expected_align_exp) > align)
14427 align = INTVAL (expected_align_exp);
14428 if (CONST_INT_P (count_exp))
14429 count = expected_size = INTVAL (count_exp);
14430 if (CONST_INT_P (expected_size_exp) && count == 0)
14431 expected_size = INTVAL (expected_size_exp);
14433 /* Step 0: Decide on preferred algorithm, desired alignment and
14434 size of chunks to be copied by main loop. */
14436 alg = decide_alg (count, expected_size, false, &dynamic_check);
14437 desired_align = decide_alignment (align, alg, expected_size);
14439 if (!TARGET_ALIGN_STRINGOPS)
14440 align = desired_align;
14442 if (alg == libcall)
14444 gcc_assert (alg != no_stringop);
14446 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14447 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14448 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14453 gcc_unreachable ();
14455 size_needed = GET_MODE_SIZE (Pmode);
14457 case unrolled_loop:
14458 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14460 case rep_prefix_8_byte:
14463 case rep_prefix_4_byte:
14466 case rep_prefix_1_byte:
14472 epilogue_size_needed = size_needed;
14474 /* Step 1: Prologue guard. */
14476 /* Alignment code needs count to be in register. */
14477 if (CONST_INT_P (count_exp) && desired_align > align)
14479 enum machine_mode mode = SImode;
14480 if (TARGET_64BIT && (count & ~0xffffffff))
14482 count_exp = force_reg (mode, count_exp);
14484 gcc_assert (desired_align >= 1 && align >= 1);
14486 /* Ensure that alignment prologue won't copy past end of block. */
14487 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14489 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14490 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14491 Make sure it is power of 2. */
14492 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14494 label = gen_label_rtx ();
14495 emit_cmp_and_jump_insns (count_exp,
14496 GEN_INT (epilogue_size_needed),
14497 LTU, 0, counter_mode (count_exp), 1, label);
14498 if (GET_CODE (count_exp) == CONST_INT)
14500 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14501 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14503 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14505 /* Emit code to decide on runtime whether library call or inline should be
14507 if (dynamic_check != -1)
14509 rtx hot_label = gen_label_rtx ();
14510 jump_around_label = gen_label_rtx ();
14511 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14512 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14513 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14514 emit_block_move_via_libcall (dst, src, count_exp, false);
14515 emit_jump (jump_around_label);
14516 emit_label (hot_label);
14519 /* Step 2: Alignment prologue. */
14521 if (desired_align > align)
14523 /* Except for the first move in epilogue, we no longer know
14524 constant offset in aliasing info. It don't seems to worth
14525 the pain to maintain it for the first move, so throw away
14527 src = change_address (src, BLKmode, srcreg);
14528 dst = change_address (dst, BLKmode, destreg);
14529 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14532 if (label && size_needed == 1)
14534 emit_label (label);
14535 LABEL_NUSES (label) = 1;
14539 /* Step 3: Main loop. */
14545 gcc_unreachable ();
14547 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14548 count_exp, QImode, 1, expected_size);
14551 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14552 count_exp, Pmode, 1, expected_size);
14554 case unrolled_loop:
14555 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14556 registers for 4 temporaries anyway. */
14557 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14558 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14561 case rep_prefix_8_byte:
14562 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14565 case rep_prefix_4_byte:
14566 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14569 case rep_prefix_1_byte:
14570 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14574 /* Adjust properly the offset of src and dest memory for aliasing. */
14575 if (CONST_INT_P (count_exp))
14577 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14578 (count / size_needed) * size_needed);
14579 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14580 (count / size_needed) * size_needed);
14584 src = change_address (src, BLKmode, srcreg);
14585 dst = change_address (dst, BLKmode, destreg);
14588 /* Step 4: Epilogue to copy the remaining bytes. */
14592 /* When the main loop is done, COUNT_EXP might hold original count,
14593 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14594 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14595 bytes. Compensate if needed. */
14597 if (size_needed < epilogue_size_needed)
14600 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14601 GEN_INT (size_needed - 1), count_exp, 1,
14603 if (tmp != count_exp)
14604 emit_move_insn (count_exp, tmp);
14606 emit_label (label);
14607 LABEL_NUSES (label) = 1;
14610 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14611 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14612 epilogue_size_needed);
14613 if (jump_around_label)
14614 emit_label (jump_around_label);
14618 /* Helper function for memcpy. For QImode value 0xXY produce
14619 0xXYXYXYXY of wide specified by MODE. This is essentially
14620 a * 0x10101010, but we can do slightly better than
14621 synth_mult by unwinding the sequence by hand on CPUs with
14624 promote_duplicated_reg (enum machine_mode mode, rtx val)
14626 enum machine_mode valmode = GET_MODE (val);
14628 int nops = mode == DImode ? 3 : 2;
14630 gcc_assert (mode == SImode || mode == DImode);
14631 if (val == const0_rtx)
14632 return copy_to_mode_reg (mode, const0_rtx);
14633 if (CONST_INT_P (val))
14635 HOST_WIDE_INT v = INTVAL (val) & 255;
14639 if (mode == DImode)
14640 v |= (v << 16) << 16;
14641 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14644 if (valmode == VOIDmode)
14646 if (valmode != QImode)
14647 val = gen_lowpart (QImode, val);
14648 if (mode == QImode)
14650 if (!TARGET_PARTIAL_REG_STALL)
14652 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14653 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14654 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14655 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14657 rtx reg = convert_modes (mode, QImode, val, true);
14658 tmp = promote_duplicated_reg (mode, const1_rtx);
14659 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14664 rtx reg = convert_modes (mode, QImode, val, true);
14666 if (!TARGET_PARTIAL_REG_STALL)
14667 if (mode == SImode)
14668 emit_insn (gen_movsi_insv_1 (reg, reg));
14670 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14673 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14674 NULL, 1, OPTAB_DIRECT);
14676 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14678 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14679 NULL, 1, OPTAB_DIRECT);
14680 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14681 if (mode == SImode)
14683 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14684 NULL, 1, OPTAB_DIRECT);
14685 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14690 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14691 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14692 alignment from ALIGN to DESIRED_ALIGN. */
14694 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14699 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14700 promoted_val = promote_duplicated_reg (DImode, val);
14701 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14702 promoted_val = promote_duplicated_reg (SImode, val);
14703 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14704 promoted_val = promote_duplicated_reg (HImode, val);
14706 promoted_val = val;
14708 return promoted_val;
14711 /* Expand string clear operation (bzero). Use i386 string operations when
14712 profitable. See expand_movmem comment for explanation of individual
14713 steps performed. */
14715 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14716 rtx expected_align_exp, rtx expected_size_exp)
14721 rtx jump_around_label = NULL;
14722 HOST_WIDE_INT align = 1;
14723 unsigned HOST_WIDE_INT count = 0;
14724 HOST_WIDE_INT expected_size = -1;
14725 int size_needed = 0, epilogue_size_needed;
14726 int desired_align = 0;
14727 enum stringop_alg alg;
14728 rtx promoted_val = NULL;
14729 bool force_loopy_epilogue = false;
14732 if (CONST_INT_P (align_exp))
14733 align = INTVAL (align_exp);
14734 /* i386 can do misaligned access on reasonably increased cost. */
14735 if (CONST_INT_P (expected_align_exp)
14736 && INTVAL (expected_align_exp) > align)
14737 align = INTVAL (expected_align_exp);
14738 if (CONST_INT_P (count_exp))
14739 count = expected_size = INTVAL (count_exp);
14740 if (CONST_INT_P (expected_size_exp) && count == 0)
14741 expected_size = INTVAL (expected_size_exp);
14743 /* Step 0: Decide on preferred algorithm, desired alignment and
14744 size of chunks to be copied by main loop. */
14746 alg = decide_alg (count, expected_size, true, &dynamic_check);
14747 desired_align = decide_alignment (align, alg, expected_size);
14749 if (!TARGET_ALIGN_STRINGOPS)
14750 align = desired_align;
14752 if (alg == libcall)
14754 gcc_assert (alg != no_stringop);
14756 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14757 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14762 gcc_unreachable ();
14764 size_needed = GET_MODE_SIZE (Pmode);
14766 case unrolled_loop:
14767 size_needed = GET_MODE_SIZE (Pmode) * 4;
14769 case rep_prefix_8_byte:
14772 case rep_prefix_4_byte:
14775 case rep_prefix_1_byte:
14780 epilogue_size_needed = size_needed;
14782 /* Step 1: Prologue guard. */
14784 /* Alignment code needs count to be in register. */
14785 if (CONST_INT_P (count_exp) && desired_align > align)
14787 enum machine_mode mode = SImode;
14788 if (TARGET_64BIT && (count & ~0xffffffff))
14790 count_exp = force_reg (mode, count_exp);
14792 /* Do the cheap promotion to allow better CSE across the
14793 main loop and epilogue (ie one load of the big constant in the
14794 front of all code. */
14795 if (CONST_INT_P (val_exp))
14796 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14797 desired_align, align);
14798 /* Ensure that alignment prologue won't copy past end of block. */
14799 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14801 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14802 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14803 Make sure it is power of 2. */
14804 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14806 /* To improve performance of small blocks, we jump around the VAL
14807 promoting mode. This mean that if the promoted VAL is not constant,
14808 we might not use it in the epilogue and have to use byte
14810 if (epilogue_size_needed > 2 && !promoted_val)
14811 force_loopy_epilogue = true;
14812 label = gen_label_rtx ();
14813 emit_cmp_and_jump_insns (count_exp,
14814 GEN_INT (epilogue_size_needed),
14815 LTU, 0, counter_mode (count_exp), 1, label);
14816 if (GET_CODE (count_exp) == CONST_INT)
14818 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14819 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14821 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14823 if (dynamic_check != -1)
14825 rtx hot_label = gen_label_rtx ();
14826 jump_around_label = gen_label_rtx ();
14827 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14828 LEU, 0, counter_mode (count_exp), 1, hot_label);
14829 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14830 set_storage_via_libcall (dst, count_exp, val_exp, false);
14831 emit_jump (jump_around_label);
14832 emit_label (hot_label);
14835 /* Step 2: Alignment prologue. */
14837 /* Do the expensive promotion once we branched off the small blocks. */
14839 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14840 desired_align, align);
14841 gcc_assert (desired_align >= 1 && align >= 1);
14843 if (desired_align > align)
14845 /* Except for the first move in epilogue, we no longer know
14846 constant offset in aliasing info. It don't seems to worth
14847 the pain to maintain it for the first move, so throw away
14849 dst = change_address (dst, BLKmode, destreg);
14850 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14853 if (label && size_needed == 1)
14855 emit_label (label);
14856 LABEL_NUSES (label) = 1;
14860 /* Step 3: Main loop. */
14866 gcc_unreachable ();
14868 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14869 count_exp, QImode, 1, expected_size);
14872 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14873 count_exp, Pmode, 1, expected_size);
14875 case unrolled_loop:
14876 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14877 count_exp, Pmode, 4, expected_size);
14879 case rep_prefix_8_byte:
14880 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14883 case rep_prefix_4_byte:
14884 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14887 case rep_prefix_1_byte:
14888 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14892 /* Adjust properly the offset of src and dest memory for aliasing. */
14893 if (CONST_INT_P (count_exp))
14894 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14895 (count / size_needed) * size_needed);
14897 dst = change_address (dst, BLKmode, destreg);
14899 /* Step 4: Epilogue to copy the remaining bytes. */
14903 /* When the main loop is done, COUNT_EXP might hold original count,
14904 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14905 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14906 bytes. Compensate if needed. */
14908 if (size_needed < desired_align - align)
14911 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14912 GEN_INT (size_needed - 1), count_exp, 1,
14914 size_needed = desired_align - align + 1;
14915 if (tmp != count_exp)
14916 emit_move_insn (count_exp, tmp);
14918 emit_label (label);
14919 LABEL_NUSES (label) = 1;
14921 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14923 if (force_loopy_epilogue)
14924 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14927 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14930 if (jump_around_label)
14931 emit_label (jump_around_label);
14935 /* Expand the appropriate insns for doing strlen if not just doing
14938 out = result, initialized with the start address
14939 align_rtx = alignment of the address.
14940 scratch = scratch register, initialized with the startaddress when
14941 not aligned, otherwise undefined
14943 This is just the body. It needs the initializations mentioned above and
14944 some address computing at the end. These things are done in i386.md. */
14947 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14951 rtx align_2_label = NULL_RTX;
14952 rtx align_3_label = NULL_RTX;
14953 rtx align_4_label = gen_label_rtx ();
14954 rtx end_0_label = gen_label_rtx ();
14956 rtx tmpreg = gen_reg_rtx (SImode);
14957 rtx scratch = gen_reg_rtx (SImode);
14961 if (CONST_INT_P (align_rtx))
14962 align = INTVAL (align_rtx);
14964 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14966 /* Is there a known alignment and is it less than 4? */
14969 rtx scratch1 = gen_reg_rtx (Pmode);
14970 emit_move_insn (scratch1, out);
14971 /* Is there a known alignment and is it not 2? */
14974 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14975 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14977 /* Leave just the 3 lower bits. */
14978 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14979 NULL_RTX, 0, OPTAB_WIDEN);
14981 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14982 Pmode, 1, align_4_label);
14983 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14984 Pmode, 1, align_2_label);
14985 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14986 Pmode, 1, align_3_label);
14990 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14991 check if is aligned to 4 - byte. */
14993 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14994 NULL_RTX, 0, OPTAB_WIDEN);
14996 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14997 Pmode, 1, align_4_label);
15000 mem = change_address (src, QImode, out);
15002 /* Now compare the bytes. */
15004 /* Compare the first n unaligned byte on a byte per byte basis. */
15005 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15006 QImode, 1, end_0_label);
15008 /* Increment the address. */
15010 emit_insn (gen_adddi3 (out, out, const1_rtx));
15012 emit_insn (gen_addsi3 (out, out, const1_rtx));
15014 /* Not needed with an alignment of 2 */
15017 emit_label (align_2_label);
15019 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15023 emit_insn (gen_adddi3 (out, out, const1_rtx));
15025 emit_insn (gen_addsi3 (out, out, const1_rtx));
15027 emit_label (align_3_label);
15030 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15034 emit_insn (gen_adddi3 (out, out, const1_rtx));
15036 emit_insn (gen_addsi3 (out, out, const1_rtx));
15039 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15040 align this loop. It gives only huge programs, but does not help to
15042 emit_label (align_4_label);
15044 mem = change_address (src, SImode, out);
15045 emit_move_insn (scratch, mem);
15047 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15049 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15051 /* This formula yields a nonzero result iff one of the bytes is zero.
15052 This saves three branches inside loop and many cycles. */
15054 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15055 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15056 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15057 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15058 gen_int_mode (0x80808080, SImode)));
15059 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15064 rtx reg = gen_reg_rtx (SImode);
15065 rtx reg2 = gen_reg_rtx (Pmode);
15066 emit_move_insn (reg, tmpreg);
15067 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15069 /* If zero is not in the first two bytes, move two bytes forward. */
15070 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15071 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15072 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15073 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15074 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15077 /* Emit lea manually to avoid clobbering of flags. */
15078 emit_insn (gen_rtx_SET (SImode, reg2,
15079 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15081 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15082 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15083 emit_insn (gen_rtx_SET (VOIDmode, out,
15084 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15091 rtx end_2_label = gen_label_rtx ();
15092 /* Is zero in the first two bytes? */
15094 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15095 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15096 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15097 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15098 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15100 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15101 JUMP_LABEL (tmp) = end_2_label;
15103 /* Not in the first two. Move two bytes forward. */
15104 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15106 emit_insn (gen_adddi3 (out, out, const2_rtx));
15108 emit_insn (gen_addsi3 (out, out, const2_rtx));
15110 emit_label (end_2_label);
15114 /* Avoid branch in fixing the byte. */
15115 tmpreg = gen_lowpart (QImode, tmpreg);
15116 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15117 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15119 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15121 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15123 emit_label (end_0_label);
15126 /* Expand strlen. */
15129 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15131 rtx addr, scratch1, scratch2, scratch3, scratch4;
15133 /* The generic case of strlen expander is long. Avoid it's
15134 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15136 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15137 && !TARGET_INLINE_ALL_STRINGOPS
15139 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15142 addr = force_reg (Pmode, XEXP (src, 0));
15143 scratch1 = gen_reg_rtx (Pmode);
15145 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15148 /* Well it seems that some optimizer does not combine a call like
15149 foo(strlen(bar), strlen(bar));
15150 when the move and the subtraction is done here. It does calculate
15151 the length just once when these instructions are done inside of
15152 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15153 often used and I use one fewer register for the lifetime of
15154 output_strlen_unroll() this is better. */
15156 emit_move_insn (out, addr);
15158 ix86_expand_strlensi_unroll_1 (out, src, align);
15160 /* strlensi_unroll_1 returns the address of the zero at the end of
15161 the string, like memchr(), so compute the length by subtracting
15162 the start address. */
15164 emit_insn (gen_subdi3 (out, out, addr));
15166 emit_insn (gen_subsi3 (out, out, addr));
15171 scratch2 = gen_reg_rtx (Pmode);
15172 scratch3 = gen_reg_rtx (Pmode);
15173 scratch4 = force_reg (Pmode, constm1_rtx);
15175 emit_move_insn (scratch3, addr);
15176 eoschar = force_reg (QImode, eoschar);
15178 src = replace_equiv_address_nv (src, scratch3);
15180 /* If .md starts supporting :P, this can be done in .md. */
15181 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15182 scratch4), UNSPEC_SCAS);
15183 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15186 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15187 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15191 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15192 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15198 /* For given symbol (function) construct code to compute address of it's PLT
15199 entry in large x86-64 PIC model. */
15201 construct_plt_address (rtx symbol)
15203 rtx tmp = gen_reg_rtx (Pmode);
15204 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15206 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15207 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15209 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15210 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15215 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15216 rtx callarg2 ATTRIBUTE_UNUSED,
15217 rtx pop, int sibcall)
15219 rtx use = NULL, call;
15221 if (pop == const0_rtx)
15223 gcc_assert (!TARGET_64BIT || !pop);
15225 if (TARGET_MACHO && !TARGET_64BIT)
15228 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15229 fnaddr = machopic_indirect_call_target (fnaddr);
15234 /* Static functions and indirect calls don't need the pic register. */
15235 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15236 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15237 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15238 use_reg (&use, pic_offset_table_rtx);
15241 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15243 rtx al = gen_rtx_REG (QImode, 0);
15244 emit_move_insn (al, callarg2);
15245 use_reg (&use, al);
15248 if (ix86_cmodel == CM_LARGE_PIC
15249 && GET_CODE (fnaddr) == MEM
15250 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15251 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15252 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15253 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15255 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15256 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15258 if (sibcall && TARGET_64BIT
15259 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15262 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15263 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15264 emit_move_insn (fnaddr, addr);
15265 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15268 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15270 call = gen_rtx_SET (VOIDmode, retval, call);
15273 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15274 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15275 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15278 call = emit_call_insn (call);
15280 CALL_INSN_FUNCTION_USAGE (call) = use;
15284 /* Clear stack slot assignments remembered from previous functions.
15285 This is called from INIT_EXPANDERS once before RTL is emitted for each
15288 static struct machine_function *
15289 ix86_init_machine_status (void)
15291 struct machine_function *f;
15293 f = ggc_alloc_cleared (sizeof (struct machine_function));
15294 f->use_fast_prologue_epilogue_nregs = -1;
15295 f->tls_descriptor_call_expanded_p = 0;
15300 /* Return a MEM corresponding to a stack slot with mode MODE.
15301 Allocate a new slot if necessary.
15303 The RTL for a function can have several slots available: N is
15304 which slot to use. */
15307 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15309 struct stack_local_entry *s;
15311 gcc_assert (n < MAX_386_STACK_LOCALS);
15313 for (s = ix86_stack_locals; s; s = s->next)
15314 if (s->mode == mode && s->n == n)
15315 return copy_rtx (s->rtl);
15317 s = (struct stack_local_entry *)
15318 ggc_alloc (sizeof (struct stack_local_entry));
15321 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15323 s->next = ix86_stack_locals;
15324 ix86_stack_locals = s;
15328 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15330 static GTY(()) rtx ix86_tls_symbol;
15332 ix86_tls_get_addr (void)
15335 if (!ix86_tls_symbol)
15337 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15338 (TARGET_ANY_GNU_TLS
15340 ? "___tls_get_addr"
15341 : "__tls_get_addr");
15344 return ix86_tls_symbol;
15347 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15349 static GTY(()) rtx ix86_tls_module_base_symbol;
15351 ix86_tls_module_base (void)
15354 if (!ix86_tls_module_base_symbol)
15356 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15357 "_TLS_MODULE_BASE_");
15358 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15359 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15362 return ix86_tls_module_base_symbol;
15365 /* Calculate the length of the memory address in the instruction
15366 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15369 memory_address_length (rtx addr)
15371 struct ix86_address parts;
15372 rtx base, index, disp;
15376 if (GET_CODE (addr) == PRE_DEC
15377 || GET_CODE (addr) == POST_INC
15378 || GET_CODE (addr) == PRE_MODIFY
15379 || GET_CODE (addr) == POST_MODIFY)
15382 ok = ix86_decompose_address (addr, &parts);
15385 if (parts.base && GET_CODE (parts.base) == SUBREG)
15386 parts.base = SUBREG_REG (parts.base);
15387 if (parts.index && GET_CODE (parts.index) == SUBREG)
15388 parts.index = SUBREG_REG (parts.index);
15391 index = parts.index;
15396 - esp as the base always wants an index,
15397 - ebp as the base always wants a displacement. */
15399 /* Register Indirect. */
15400 if (base && !index && !disp)
15402 /* esp (for its index) and ebp (for its displacement) need
15403 the two-byte modrm form. */
15404 if (addr == stack_pointer_rtx
15405 || addr == arg_pointer_rtx
15406 || addr == frame_pointer_rtx
15407 || addr == hard_frame_pointer_rtx)
15411 /* Direct Addressing. */
15412 else if (disp && !base && !index)
15417 /* Find the length of the displacement constant. */
15420 if (base && satisfies_constraint_K (disp))
15425 /* ebp always wants a displacement. */
15426 else if (base == hard_frame_pointer_rtx)
15429 /* An index requires the two-byte modrm form.... */
15431 /* ...like esp, which always wants an index. */
15432 || base == stack_pointer_rtx
15433 || base == arg_pointer_rtx
15434 || base == frame_pointer_rtx)
15441 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15442 is set, expect that insn have 8bit immediate alternative. */
15444 ix86_attr_length_immediate_default (rtx insn, int shortform)
15448 extract_insn_cached (insn);
15449 for (i = recog_data.n_operands - 1; i >= 0; --i)
15450 if (CONSTANT_P (recog_data.operand[i]))
15453 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15457 switch (get_attr_mode (insn))
15468 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15473 fatal_insn ("unknown insn mode", insn);
15479 /* Compute default value for "length_address" attribute. */
15481 ix86_attr_length_address_default (rtx insn)
15485 if (get_attr_type (insn) == TYPE_LEA)
15487 rtx set = PATTERN (insn);
15489 if (GET_CODE (set) == PARALLEL)
15490 set = XVECEXP (set, 0, 0);
15492 gcc_assert (GET_CODE (set) == SET);
15494 return memory_address_length (SET_SRC (set));
15497 extract_insn_cached (insn);
15498 for (i = recog_data.n_operands - 1; i >= 0; --i)
15499 if (MEM_P (recog_data.operand[i]))
15501 return memory_address_length (XEXP (recog_data.operand[i], 0));
15507 /* Return the maximum number of instructions a cpu can issue. */
15510 ix86_issue_rate (void)
15514 case PROCESSOR_PENTIUM:
15518 case PROCESSOR_PENTIUMPRO:
15519 case PROCESSOR_PENTIUM4:
15520 case PROCESSOR_ATHLON:
15522 case PROCESSOR_AMDFAM10:
15523 case PROCESSOR_NOCONA:
15524 case PROCESSOR_GENERIC32:
15525 case PROCESSOR_GENERIC64:
15528 case PROCESSOR_CORE2:
15536 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15537 by DEP_INSN and nothing set by DEP_INSN. */
15540 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15544 /* Simplify the test for uninteresting insns. */
15545 if (insn_type != TYPE_SETCC
15546 && insn_type != TYPE_ICMOV
15547 && insn_type != TYPE_FCMOV
15548 && insn_type != TYPE_IBR)
15551 if ((set = single_set (dep_insn)) != 0)
15553 set = SET_DEST (set);
15556 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15557 && XVECLEN (PATTERN (dep_insn), 0) == 2
15558 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15559 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15561 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15562 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15567 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15570 /* This test is true if the dependent insn reads the flags but
15571 not any other potentially set register. */
15572 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15575 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15581 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15582 address with operands set by DEP_INSN. */
15585 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15589 if (insn_type == TYPE_LEA
15592 addr = PATTERN (insn);
15594 if (GET_CODE (addr) == PARALLEL)
15595 addr = XVECEXP (addr, 0, 0);
15597 gcc_assert (GET_CODE (addr) == SET);
15599 addr = SET_SRC (addr);
15604 extract_insn_cached (insn);
15605 for (i = recog_data.n_operands - 1; i >= 0; --i)
15606 if (MEM_P (recog_data.operand[i]))
15608 addr = XEXP (recog_data.operand[i], 0);
15615 return modified_in_p (addr, dep_insn);
15619 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15621 enum attr_type insn_type, dep_insn_type;
15622 enum attr_memory memory;
15624 int dep_insn_code_number;
15626 /* Anti and output dependencies have zero cost on all CPUs. */
15627 if (REG_NOTE_KIND (link) != 0)
15630 dep_insn_code_number = recog_memoized (dep_insn);
15632 /* If we can't recognize the insns, we can't really do anything. */
15633 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15636 insn_type = get_attr_type (insn);
15637 dep_insn_type = get_attr_type (dep_insn);
15641 case PROCESSOR_PENTIUM:
15642 /* Address Generation Interlock adds a cycle of latency. */
15643 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15646 /* ??? Compares pair with jump/setcc. */
15647 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15650 /* Floating point stores require value to be ready one cycle earlier. */
15651 if (insn_type == TYPE_FMOV
15652 && get_attr_memory (insn) == MEMORY_STORE
15653 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15657 case PROCESSOR_PENTIUMPRO:
15658 memory = get_attr_memory (insn);
15660 /* INT->FP conversion is expensive. */
15661 if (get_attr_fp_int_src (dep_insn))
15664 /* There is one cycle extra latency between an FP op and a store. */
15665 if (insn_type == TYPE_FMOV
15666 && (set = single_set (dep_insn)) != NULL_RTX
15667 && (set2 = single_set (insn)) != NULL_RTX
15668 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15669 && MEM_P (SET_DEST (set2)))
15672 /* Show ability of reorder buffer to hide latency of load by executing
15673 in parallel with previous instruction in case
15674 previous instruction is not needed to compute the address. */
15675 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15676 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15678 /* Claim moves to take one cycle, as core can issue one load
15679 at time and the next load can start cycle later. */
15680 if (dep_insn_type == TYPE_IMOV
15681 || dep_insn_type == TYPE_FMOV)
15689 memory = get_attr_memory (insn);
15691 /* The esp dependency is resolved before the instruction is really
15693 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15694 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15697 /* INT->FP conversion is expensive. */
15698 if (get_attr_fp_int_src (dep_insn))
15701 /* Show ability of reorder buffer to hide latency of load by executing
15702 in parallel with previous instruction in case
15703 previous instruction is not needed to compute the address. */
15704 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15705 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15707 /* Claim moves to take one cycle, as core can issue one load
15708 at time and the next load can start cycle later. */
15709 if (dep_insn_type == TYPE_IMOV
15710 || dep_insn_type == TYPE_FMOV)
15719 case PROCESSOR_ATHLON:
15721 case PROCESSOR_AMDFAM10:
15722 case PROCESSOR_GENERIC32:
15723 case PROCESSOR_GENERIC64:
15724 memory = get_attr_memory (insn);
15726 /* Show ability of reorder buffer to hide latency of load by executing
15727 in parallel with previous instruction in case
15728 previous instruction is not needed to compute the address. */
15729 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15730 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15732 enum attr_unit unit = get_attr_unit (insn);
15735 /* Because of the difference between the length of integer and
15736 floating unit pipeline preparation stages, the memory operands
15737 for floating point are cheaper.
15739 ??? For Athlon it the difference is most probably 2. */
15740 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15743 loadcost = TARGET_ATHLON ? 2 : 0;
15745 if (cost >= loadcost)
15758 /* How many alternative schedules to try. This should be as wide as the
15759 scheduling freedom in the DFA, but no wider. Making this value too
15760 large results extra work for the scheduler. */
15763 ia32_multipass_dfa_lookahead (void)
15765 if (ix86_tune == PROCESSOR_PENTIUM)
15768 if (ix86_tune == PROCESSOR_PENTIUMPRO
15769 || ix86_tune == PROCESSOR_K6)
15777 /* Compute the alignment given to a constant that is being placed in memory.
15778 EXP is the constant and ALIGN is the alignment that the object would
15780 The value of this function is used instead of that alignment to align
15784 ix86_constant_alignment (tree exp, int align)
15786 if (TREE_CODE (exp) == REAL_CST)
15788 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15790 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15793 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15794 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15795 return BITS_PER_WORD;
15800 /* Compute the alignment for a static variable.
15801 TYPE is the data type, and ALIGN is the alignment that
15802 the object would ordinarily have. The value of this function is used
15803 instead of that alignment to align the object. */
15806 ix86_data_alignment (tree type, int align)
15808 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15810 if (AGGREGATE_TYPE_P (type)
15811 && TYPE_SIZE (type)
15812 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15813 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15814 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15815 && align < max_align)
15818 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15819 to 16byte boundary. */
15822 if (AGGREGATE_TYPE_P (type)
15823 && TYPE_SIZE (type)
15824 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15825 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15826 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15830 if (TREE_CODE (type) == ARRAY_TYPE)
15832 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15834 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15837 else if (TREE_CODE (type) == COMPLEX_TYPE)
15840 if (TYPE_MODE (type) == DCmode && align < 64)
15842 if (TYPE_MODE (type) == XCmode && align < 128)
15845 else if ((TREE_CODE (type) == RECORD_TYPE
15846 || TREE_CODE (type) == UNION_TYPE
15847 || TREE_CODE (type) == QUAL_UNION_TYPE)
15848 && TYPE_FIELDS (type))
15850 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15852 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15855 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15856 || TREE_CODE (type) == INTEGER_TYPE)
15858 if (TYPE_MODE (type) == DFmode && align < 64)
15860 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15867 /* Compute the alignment for a local variable.
15868 TYPE is the data type, and ALIGN is the alignment that
15869 the object would ordinarily have. The value of this macro is used
15870 instead of that alignment to align the object. */
15873 ix86_local_alignment (tree type, int align)
15875 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15876 to 16byte boundary. */
15879 if (AGGREGATE_TYPE_P (type)
15880 && TYPE_SIZE (type)
15881 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15882 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15883 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15886 if (TREE_CODE (type) == ARRAY_TYPE)
15888 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15890 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15893 else if (TREE_CODE (type) == COMPLEX_TYPE)
15895 if (TYPE_MODE (type) == DCmode && align < 64)
15897 if (TYPE_MODE (type) == XCmode && align < 128)
15900 else if ((TREE_CODE (type) == RECORD_TYPE
15901 || TREE_CODE (type) == UNION_TYPE
15902 || TREE_CODE (type) == QUAL_UNION_TYPE)
15903 && TYPE_FIELDS (type))
15905 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15907 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15910 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15911 || TREE_CODE (type) == INTEGER_TYPE)
15914 if (TYPE_MODE (type) == DFmode && align < 64)
15916 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15922 /* Emit RTL insns to initialize the variable parts of a trampoline.
15923 FNADDR is an RTX for the address of the function's pure code.
15924 CXT is an RTX for the static chain value for the function. */
15926 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15930 /* Compute offset from the end of the jmp to the target function. */
15931 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15932 plus_constant (tramp, 10),
15933 NULL_RTX, 1, OPTAB_DIRECT);
15934 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15935 gen_int_mode (0xb9, QImode));
15936 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15937 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15938 gen_int_mode (0xe9, QImode));
15939 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15944 /* Try to load address using shorter movl instead of movabs.
15945 We may want to support movq for kernel mode, but kernel does not use
15946 trampolines at the moment. */
15947 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15949 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15950 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15951 gen_int_mode (0xbb41, HImode));
15952 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15953 gen_lowpart (SImode, fnaddr));
15958 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15959 gen_int_mode (0xbb49, HImode));
15960 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15964 /* Load static chain using movabs to r10. */
15965 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15966 gen_int_mode (0xba49, HImode));
15967 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15970 /* Jump to the r11 */
15971 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15972 gen_int_mode (0xff49, HImode));
15973 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15974 gen_int_mode (0xe3, QImode));
15976 gcc_assert (offset <= TRAMPOLINE_SIZE);
15979 #ifdef ENABLE_EXECUTE_STACK
15980 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15981 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15985 /* Codes for all the SSE/MMX builtins. */
15988 IX86_BUILTIN_ADDPS,
15989 IX86_BUILTIN_ADDSS,
15990 IX86_BUILTIN_DIVPS,
15991 IX86_BUILTIN_DIVSS,
15992 IX86_BUILTIN_MULPS,
15993 IX86_BUILTIN_MULSS,
15994 IX86_BUILTIN_SUBPS,
15995 IX86_BUILTIN_SUBSS,
15997 IX86_BUILTIN_CMPEQPS,
15998 IX86_BUILTIN_CMPLTPS,
15999 IX86_BUILTIN_CMPLEPS,
16000 IX86_BUILTIN_CMPGTPS,
16001 IX86_BUILTIN_CMPGEPS,
16002 IX86_BUILTIN_CMPNEQPS,
16003 IX86_BUILTIN_CMPNLTPS,
16004 IX86_BUILTIN_CMPNLEPS,
16005 IX86_BUILTIN_CMPNGTPS,
16006 IX86_BUILTIN_CMPNGEPS,
16007 IX86_BUILTIN_CMPORDPS,
16008 IX86_BUILTIN_CMPUNORDPS,
16009 IX86_BUILTIN_CMPEQSS,
16010 IX86_BUILTIN_CMPLTSS,
16011 IX86_BUILTIN_CMPLESS,
16012 IX86_BUILTIN_CMPNEQSS,
16013 IX86_BUILTIN_CMPNLTSS,
16014 IX86_BUILTIN_CMPNLESS,
16015 IX86_BUILTIN_CMPNGTSS,
16016 IX86_BUILTIN_CMPNGESS,
16017 IX86_BUILTIN_CMPORDSS,
16018 IX86_BUILTIN_CMPUNORDSS,
16020 IX86_BUILTIN_COMIEQSS,
16021 IX86_BUILTIN_COMILTSS,
16022 IX86_BUILTIN_COMILESS,
16023 IX86_BUILTIN_COMIGTSS,
16024 IX86_BUILTIN_COMIGESS,
16025 IX86_BUILTIN_COMINEQSS,
16026 IX86_BUILTIN_UCOMIEQSS,
16027 IX86_BUILTIN_UCOMILTSS,
16028 IX86_BUILTIN_UCOMILESS,
16029 IX86_BUILTIN_UCOMIGTSS,
16030 IX86_BUILTIN_UCOMIGESS,
16031 IX86_BUILTIN_UCOMINEQSS,
16033 IX86_BUILTIN_CVTPI2PS,
16034 IX86_BUILTIN_CVTPS2PI,
16035 IX86_BUILTIN_CVTSI2SS,
16036 IX86_BUILTIN_CVTSI642SS,
16037 IX86_BUILTIN_CVTSS2SI,
16038 IX86_BUILTIN_CVTSS2SI64,
16039 IX86_BUILTIN_CVTTPS2PI,
16040 IX86_BUILTIN_CVTTSS2SI,
16041 IX86_BUILTIN_CVTTSS2SI64,
16043 IX86_BUILTIN_MAXPS,
16044 IX86_BUILTIN_MAXSS,
16045 IX86_BUILTIN_MINPS,
16046 IX86_BUILTIN_MINSS,
16048 IX86_BUILTIN_LOADUPS,
16049 IX86_BUILTIN_STOREUPS,
16050 IX86_BUILTIN_MOVSS,
16052 IX86_BUILTIN_MOVHLPS,
16053 IX86_BUILTIN_MOVLHPS,
16054 IX86_BUILTIN_LOADHPS,
16055 IX86_BUILTIN_LOADLPS,
16056 IX86_BUILTIN_STOREHPS,
16057 IX86_BUILTIN_STORELPS,
16059 IX86_BUILTIN_MASKMOVQ,
16060 IX86_BUILTIN_MOVMSKPS,
16061 IX86_BUILTIN_PMOVMSKB,
16063 IX86_BUILTIN_MOVNTPS,
16064 IX86_BUILTIN_MOVNTQ,
16066 IX86_BUILTIN_LOADDQU,
16067 IX86_BUILTIN_STOREDQU,
16069 IX86_BUILTIN_PACKSSWB,
16070 IX86_BUILTIN_PACKSSDW,
16071 IX86_BUILTIN_PACKUSWB,
16073 IX86_BUILTIN_PADDB,
16074 IX86_BUILTIN_PADDW,
16075 IX86_BUILTIN_PADDD,
16076 IX86_BUILTIN_PADDQ,
16077 IX86_BUILTIN_PADDSB,
16078 IX86_BUILTIN_PADDSW,
16079 IX86_BUILTIN_PADDUSB,
16080 IX86_BUILTIN_PADDUSW,
16081 IX86_BUILTIN_PSUBB,
16082 IX86_BUILTIN_PSUBW,
16083 IX86_BUILTIN_PSUBD,
16084 IX86_BUILTIN_PSUBQ,
16085 IX86_BUILTIN_PSUBSB,
16086 IX86_BUILTIN_PSUBSW,
16087 IX86_BUILTIN_PSUBUSB,
16088 IX86_BUILTIN_PSUBUSW,
16091 IX86_BUILTIN_PANDN,
16095 IX86_BUILTIN_PAVGB,
16096 IX86_BUILTIN_PAVGW,
16098 IX86_BUILTIN_PCMPEQB,
16099 IX86_BUILTIN_PCMPEQW,
16100 IX86_BUILTIN_PCMPEQD,
16101 IX86_BUILTIN_PCMPGTB,
16102 IX86_BUILTIN_PCMPGTW,
16103 IX86_BUILTIN_PCMPGTD,
16105 IX86_BUILTIN_PMADDWD,
16107 IX86_BUILTIN_PMAXSW,
16108 IX86_BUILTIN_PMAXUB,
16109 IX86_BUILTIN_PMINSW,
16110 IX86_BUILTIN_PMINUB,
16112 IX86_BUILTIN_PMULHUW,
16113 IX86_BUILTIN_PMULHW,
16114 IX86_BUILTIN_PMULLW,
16116 IX86_BUILTIN_PSADBW,
16117 IX86_BUILTIN_PSHUFW,
16119 IX86_BUILTIN_PSLLW,
16120 IX86_BUILTIN_PSLLD,
16121 IX86_BUILTIN_PSLLQ,
16122 IX86_BUILTIN_PSRAW,
16123 IX86_BUILTIN_PSRAD,
16124 IX86_BUILTIN_PSRLW,
16125 IX86_BUILTIN_PSRLD,
16126 IX86_BUILTIN_PSRLQ,
16127 IX86_BUILTIN_PSLLWI,
16128 IX86_BUILTIN_PSLLDI,
16129 IX86_BUILTIN_PSLLQI,
16130 IX86_BUILTIN_PSRAWI,
16131 IX86_BUILTIN_PSRADI,
16132 IX86_BUILTIN_PSRLWI,
16133 IX86_BUILTIN_PSRLDI,
16134 IX86_BUILTIN_PSRLQI,
16136 IX86_BUILTIN_PUNPCKHBW,
16137 IX86_BUILTIN_PUNPCKHWD,
16138 IX86_BUILTIN_PUNPCKHDQ,
16139 IX86_BUILTIN_PUNPCKLBW,
16140 IX86_BUILTIN_PUNPCKLWD,
16141 IX86_BUILTIN_PUNPCKLDQ,
16143 IX86_BUILTIN_SHUFPS,
16145 IX86_BUILTIN_RCPPS,
16146 IX86_BUILTIN_RCPSS,
16147 IX86_BUILTIN_RSQRTPS,
16148 IX86_BUILTIN_RSQRTSS,
16149 IX86_BUILTIN_SQRTPS,
16150 IX86_BUILTIN_SQRTSS,
16152 IX86_BUILTIN_UNPCKHPS,
16153 IX86_BUILTIN_UNPCKLPS,
16155 IX86_BUILTIN_ANDPS,
16156 IX86_BUILTIN_ANDNPS,
16158 IX86_BUILTIN_XORPS,
16161 IX86_BUILTIN_LDMXCSR,
16162 IX86_BUILTIN_STMXCSR,
16163 IX86_BUILTIN_SFENCE,
16165 /* 3DNow! Original */
16166 IX86_BUILTIN_FEMMS,
16167 IX86_BUILTIN_PAVGUSB,
16168 IX86_BUILTIN_PF2ID,
16169 IX86_BUILTIN_PFACC,
16170 IX86_BUILTIN_PFADD,
16171 IX86_BUILTIN_PFCMPEQ,
16172 IX86_BUILTIN_PFCMPGE,
16173 IX86_BUILTIN_PFCMPGT,
16174 IX86_BUILTIN_PFMAX,
16175 IX86_BUILTIN_PFMIN,
16176 IX86_BUILTIN_PFMUL,
16177 IX86_BUILTIN_PFRCP,
16178 IX86_BUILTIN_PFRCPIT1,
16179 IX86_BUILTIN_PFRCPIT2,
16180 IX86_BUILTIN_PFRSQIT1,
16181 IX86_BUILTIN_PFRSQRT,
16182 IX86_BUILTIN_PFSUB,
16183 IX86_BUILTIN_PFSUBR,
16184 IX86_BUILTIN_PI2FD,
16185 IX86_BUILTIN_PMULHRW,
16187 /* 3DNow! Athlon Extensions */
16188 IX86_BUILTIN_PF2IW,
16189 IX86_BUILTIN_PFNACC,
16190 IX86_BUILTIN_PFPNACC,
16191 IX86_BUILTIN_PI2FW,
16192 IX86_BUILTIN_PSWAPDSI,
16193 IX86_BUILTIN_PSWAPDSF,
16196 IX86_BUILTIN_ADDPD,
16197 IX86_BUILTIN_ADDSD,
16198 IX86_BUILTIN_DIVPD,
16199 IX86_BUILTIN_DIVSD,
16200 IX86_BUILTIN_MULPD,
16201 IX86_BUILTIN_MULSD,
16202 IX86_BUILTIN_SUBPD,
16203 IX86_BUILTIN_SUBSD,
16205 IX86_BUILTIN_CMPEQPD,
16206 IX86_BUILTIN_CMPLTPD,
16207 IX86_BUILTIN_CMPLEPD,
16208 IX86_BUILTIN_CMPGTPD,
16209 IX86_BUILTIN_CMPGEPD,
16210 IX86_BUILTIN_CMPNEQPD,
16211 IX86_BUILTIN_CMPNLTPD,
16212 IX86_BUILTIN_CMPNLEPD,
16213 IX86_BUILTIN_CMPNGTPD,
16214 IX86_BUILTIN_CMPNGEPD,
16215 IX86_BUILTIN_CMPORDPD,
16216 IX86_BUILTIN_CMPUNORDPD,
16217 IX86_BUILTIN_CMPEQSD,
16218 IX86_BUILTIN_CMPLTSD,
16219 IX86_BUILTIN_CMPLESD,
16220 IX86_BUILTIN_CMPNEQSD,
16221 IX86_BUILTIN_CMPNLTSD,
16222 IX86_BUILTIN_CMPNLESD,
16223 IX86_BUILTIN_CMPORDSD,
16224 IX86_BUILTIN_CMPUNORDSD,
16226 IX86_BUILTIN_COMIEQSD,
16227 IX86_BUILTIN_COMILTSD,
16228 IX86_BUILTIN_COMILESD,
16229 IX86_BUILTIN_COMIGTSD,
16230 IX86_BUILTIN_COMIGESD,
16231 IX86_BUILTIN_COMINEQSD,
16232 IX86_BUILTIN_UCOMIEQSD,
16233 IX86_BUILTIN_UCOMILTSD,
16234 IX86_BUILTIN_UCOMILESD,
16235 IX86_BUILTIN_UCOMIGTSD,
16236 IX86_BUILTIN_UCOMIGESD,
16237 IX86_BUILTIN_UCOMINEQSD,
16239 IX86_BUILTIN_MAXPD,
16240 IX86_BUILTIN_MAXSD,
16241 IX86_BUILTIN_MINPD,
16242 IX86_BUILTIN_MINSD,
16244 IX86_BUILTIN_ANDPD,
16245 IX86_BUILTIN_ANDNPD,
16247 IX86_BUILTIN_XORPD,
16249 IX86_BUILTIN_SQRTPD,
16250 IX86_BUILTIN_SQRTSD,
16252 IX86_BUILTIN_UNPCKHPD,
16253 IX86_BUILTIN_UNPCKLPD,
16255 IX86_BUILTIN_SHUFPD,
16257 IX86_BUILTIN_LOADUPD,
16258 IX86_BUILTIN_STOREUPD,
16259 IX86_BUILTIN_MOVSD,
16261 IX86_BUILTIN_LOADHPD,
16262 IX86_BUILTIN_LOADLPD,
16264 IX86_BUILTIN_CVTDQ2PD,
16265 IX86_BUILTIN_CVTDQ2PS,
16267 IX86_BUILTIN_CVTPD2DQ,
16268 IX86_BUILTIN_CVTPD2PI,
16269 IX86_BUILTIN_CVTPD2PS,
16270 IX86_BUILTIN_CVTTPD2DQ,
16271 IX86_BUILTIN_CVTTPD2PI,
16273 IX86_BUILTIN_CVTPI2PD,
16274 IX86_BUILTIN_CVTSI2SD,
16275 IX86_BUILTIN_CVTSI642SD,
16277 IX86_BUILTIN_CVTSD2SI,
16278 IX86_BUILTIN_CVTSD2SI64,
16279 IX86_BUILTIN_CVTSD2SS,
16280 IX86_BUILTIN_CVTSS2SD,
16281 IX86_BUILTIN_CVTTSD2SI,
16282 IX86_BUILTIN_CVTTSD2SI64,
16284 IX86_BUILTIN_CVTPS2DQ,
16285 IX86_BUILTIN_CVTPS2PD,
16286 IX86_BUILTIN_CVTTPS2DQ,
16288 IX86_BUILTIN_MOVNTI,
16289 IX86_BUILTIN_MOVNTPD,
16290 IX86_BUILTIN_MOVNTDQ,
16293 IX86_BUILTIN_MASKMOVDQU,
16294 IX86_BUILTIN_MOVMSKPD,
16295 IX86_BUILTIN_PMOVMSKB128,
16297 IX86_BUILTIN_PACKSSWB128,
16298 IX86_BUILTIN_PACKSSDW128,
16299 IX86_BUILTIN_PACKUSWB128,
16301 IX86_BUILTIN_PADDB128,
16302 IX86_BUILTIN_PADDW128,
16303 IX86_BUILTIN_PADDD128,
16304 IX86_BUILTIN_PADDQ128,
16305 IX86_BUILTIN_PADDSB128,
16306 IX86_BUILTIN_PADDSW128,
16307 IX86_BUILTIN_PADDUSB128,
16308 IX86_BUILTIN_PADDUSW128,
16309 IX86_BUILTIN_PSUBB128,
16310 IX86_BUILTIN_PSUBW128,
16311 IX86_BUILTIN_PSUBD128,
16312 IX86_BUILTIN_PSUBQ128,
16313 IX86_BUILTIN_PSUBSB128,
16314 IX86_BUILTIN_PSUBSW128,
16315 IX86_BUILTIN_PSUBUSB128,
16316 IX86_BUILTIN_PSUBUSW128,
16318 IX86_BUILTIN_PAND128,
16319 IX86_BUILTIN_PANDN128,
16320 IX86_BUILTIN_POR128,
16321 IX86_BUILTIN_PXOR128,
16323 IX86_BUILTIN_PAVGB128,
16324 IX86_BUILTIN_PAVGW128,
16326 IX86_BUILTIN_PCMPEQB128,
16327 IX86_BUILTIN_PCMPEQW128,
16328 IX86_BUILTIN_PCMPEQD128,
16329 IX86_BUILTIN_PCMPGTB128,
16330 IX86_BUILTIN_PCMPGTW128,
16331 IX86_BUILTIN_PCMPGTD128,
16333 IX86_BUILTIN_PMADDWD128,
16335 IX86_BUILTIN_PMAXSW128,
16336 IX86_BUILTIN_PMAXUB128,
16337 IX86_BUILTIN_PMINSW128,
16338 IX86_BUILTIN_PMINUB128,
16340 IX86_BUILTIN_PMULUDQ,
16341 IX86_BUILTIN_PMULUDQ128,
16342 IX86_BUILTIN_PMULHUW128,
16343 IX86_BUILTIN_PMULHW128,
16344 IX86_BUILTIN_PMULLW128,
16346 IX86_BUILTIN_PSADBW128,
16347 IX86_BUILTIN_PSHUFHW,
16348 IX86_BUILTIN_PSHUFLW,
16349 IX86_BUILTIN_PSHUFD,
16351 IX86_BUILTIN_PSLLDQI128,
16352 IX86_BUILTIN_PSLLWI128,
16353 IX86_BUILTIN_PSLLDI128,
16354 IX86_BUILTIN_PSLLQI128,
16355 IX86_BUILTIN_PSRAWI128,
16356 IX86_BUILTIN_PSRADI128,
16357 IX86_BUILTIN_PSRLDQI128,
16358 IX86_BUILTIN_PSRLWI128,
16359 IX86_BUILTIN_PSRLDI128,
16360 IX86_BUILTIN_PSRLQI128,
16362 IX86_BUILTIN_PSLLDQ128,
16363 IX86_BUILTIN_PSLLW128,
16364 IX86_BUILTIN_PSLLD128,
16365 IX86_BUILTIN_PSLLQ128,
16366 IX86_BUILTIN_PSRAW128,
16367 IX86_BUILTIN_PSRAD128,
16368 IX86_BUILTIN_PSRLW128,
16369 IX86_BUILTIN_PSRLD128,
16370 IX86_BUILTIN_PSRLQ128,
16372 IX86_BUILTIN_PUNPCKHBW128,
16373 IX86_BUILTIN_PUNPCKHWD128,
16374 IX86_BUILTIN_PUNPCKHDQ128,
16375 IX86_BUILTIN_PUNPCKHQDQ128,
16376 IX86_BUILTIN_PUNPCKLBW128,
16377 IX86_BUILTIN_PUNPCKLWD128,
16378 IX86_BUILTIN_PUNPCKLDQ128,
16379 IX86_BUILTIN_PUNPCKLQDQ128,
16381 IX86_BUILTIN_CLFLUSH,
16382 IX86_BUILTIN_MFENCE,
16383 IX86_BUILTIN_LFENCE,
16385 /* Prescott New Instructions. */
16386 IX86_BUILTIN_ADDSUBPS,
16387 IX86_BUILTIN_HADDPS,
16388 IX86_BUILTIN_HSUBPS,
16389 IX86_BUILTIN_MOVSHDUP,
16390 IX86_BUILTIN_MOVSLDUP,
16391 IX86_BUILTIN_ADDSUBPD,
16392 IX86_BUILTIN_HADDPD,
16393 IX86_BUILTIN_HSUBPD,
16394 IX86_BUILTIN_LDDQU,
16396 IX86_BUILTIN_MONITOR,
16397 IX86_BUILTIN_MWAIT,
16400 IX86_BUILTIN_PHADDW,
16401 IX86_BUILTIN_PHADDD,
16402 IX86_BUILTIN_PHADDSW,
16403 IX86_BUILTIN_PHSUBW,
16404 IX86_BUILTIN_PHSUBD,
16405 IX86_BUILTIN_PHSUBSW,
16406 IX86_BUILTIN_PMADDUBSW,
16407 IX86_BUILTIN_PMULHRSW,
16408 IX86_BUILTIN_PSHUFB,
16409 IX86_BUILTIN_PSIGNB,
16410 IX86_BUILTIN_PSIGNW,
16411 IX86_BUILTIN_PSIGND,
16412 IX86_BUILTIN_PALIGNR,
16413 IX86_BUILTIN_PABSB,
16414 IX86_BUILTIN_PABSW,
16415 IX86_BUILTIN_PABSD,
16417 IX86_BUILTIN_PHADDW128,
16418 IX86_BUILTIN_PHADDD128,
16419 IX86_BUILTIN_PHADDSW128,
16420 IX86_BUILTIN_PHSUBW128,
16421 IX86_BUILTIN_PHSUBD128,
16422 IX86_BUILTIN_PHSUBSW128,
16423 IX86_BUILTIN_PMADDUBSW128,
16424 IX86_BUILTIN_PMULHRSW128,
16425 IX86_BUILTIN_PSHUFB128,
16426 IX86_BUILTIN_PSIGNB128,
16427 IX86_BUILTIN_PSIGNW128,
16428 IX86_BUILTIN_PSIGND128,
16429 IX86_BUILTIN_PALIGNR128,
16430 IX86_BUILTIN_PABSB128,
16431 IX86_BUILTIN_PABSW128,
16432 IX86_BUILTIN_PABSD128,
16434 /* AMDFAM10 - SSE4A New Instructions. */
16435 IX86_BUILTIN_MOVNTSD,
16436 IX86_BUILTIN_MOVNTSS,
16437 IX86_BUILTIN_EXTRQI,
16438 IX86_BUILTIN_EXTRQ,
16439 IX86_BUILTIN_INSERTQI,
16440 IX86_BUILTIN_INSERTQ,
16443 IX86_BUILTIN_BLENDPD,
16444 IX86_BUILTIN_BLENDPS,
16445 IX86_BUILTIN_BLENDVPD,
16446 IX86_BUILTIN_BLENDVPS,
16447 IX86_BUILTIN_PBLENDVB128,
16448 IX86_BUILTIN_PBLENDW128,
16453 IX86_BUILTIN_INSERTPS128,
16455 IX86_BUILTIN_MOVNTDQA,
16456 IX86_BUILTIN_MPSADBW128,
16457 IX86_BUILTIN_PACKUSDW128,
16458 IX86_BUILTIN_PCMPEQQ,
16459 IX86_BUILTIN_PHMINPOSUW128,
16461 IX86_BUILTIN_PMAXSB128,
16462 IX86_BUILTIN_PMAXSD128,
16463 IX86_BUILTIN_PMAXUD128,
16464 IX86_BUILTIN_PMAXUW128,
16466 IX86_BUILTIN_PMINSB128,
16467 IX86_BUILTIN_PMINSD128,
16468 IX86_BUILTIN_PMINUD128,
16469 IX86_BUILTIN_PMINUW128,
16471 IX86_BUILTIN_PMOVSXBW128,
16472 IX86_BUILTIN_PMOVSXBD128,
16473 IX86_BUILTIN_PMOVSXBQ128,
16474 IX86_BUILTIN_PMOVSXWD128,
16475 IX86_BUILTIN_PMOVSXWQ128,
16476 IX86_BUILTIN_PMOVSXDQ128,
16478 IX86_BUILTIN_PMOVZXBW128,
16479 IX86_BUILTIN_PMOVZXBD128,
16480 IX86_BUILTIN_PMOVZXBQ128,
16481 IX86_BUILTIN_PMOVZXWD128,
16482 IX86_BUILTIN_PMOVZXWQ128,
16483 IX86_BUILTIN_PMOVZXDQ128,
16485 IX86_BUILTIN_PMULDQ128,
16486 IX86_BUILTIN_PMULLD128,
16488 IX86_BUILTIN_ROUNDPD,
16489 IX86_BUILTIN_ROUNDPS,
16490 IX86_BUILTIN_ROUNDSD,
16491 IX86_BUILTIN_ROUNDSS,
16493 IX86_BUILTIN_PTESTZ,
16494 IX86_BUILTIN_PTESTC,
16495 IX86_BUILTIN_PTESTNZC,
16497 IX86_BUILTIN_VEC_INIT_V2SI,
16498 IX86_BUILTIN_VEC_INIT_V4HI,
16499 IX86_BUILTIN_VEC_INIT_V8QI,
16500 IX86_BUILTIN_VEC_EXT_V2DF,
16501 IX86_BUILTIN_VEC_EXT_V2DI,
16502 IX86_BUILTIN_VEC_EXT_V4SF,
16503 IX86_BUILTIN_VEC_EXT_V4SI,
16504 IX86_BUILTIN_VEC_EXT_V8HI,
16505 IX86_BUILTIN_VEC_EXT_V2SI,
16506 IX86_BUILTIN_VEC_EXT_V4HI,
16507 IX86_BUILTIN_VEC_EXT_V16QI,
16508 IX86_BUILTIN_VEC_SET_V2DI,
16509 IX86_BUILTIN_VEC_SET_V4SF,
16510 IX86_BUILTIN_VEC_SET_V4SI,
16511 IX86_BUILTIN_VEC_SET_V8HI,
16512 IX86_BUILTIN_VEC_SET_V4HI,
16513 IX86_BUILTIN_VEC_SET_V16QI,
16518 /* Table for the ix86 builtin decls. */
16519 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16521 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16522 * if the target_flags include one of MASK. Stores the function decl
16523 * in the ix86_builtins array.
16524 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16527 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16529 tree decl = NULL_TREE;
16531 if (mask & target_flags
16532 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16534 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16536 ix86_builtins[(int) code] = decl;
16542 /* Like def_builtin, but also marks the function decl "const". */
16545 def_builtin_const (int mask, const char *name, tree type,
16546 enum ix86_builtins code)
16548 tree decl = def_builtin (mask, name, type, code);
16550 TREE_READONLY (decl) = 1;
16554 /* Bits for builtin_description.flag. */
16556 /* Set when we don't support the comparison natively, and should
16557 swap_comparison in order to support it. */
16558 #define BUILTIN_DESC_SWAP_OPERANDS 1
16560 struct builtin_description
16562 const unsigned int mask;
16563 const enum insn_code icode;
16564 const char *const name;
16565 const enum ix86_builtins code;
16566 const enum rtx_code comparison;
16567 const unsigned int flag;
16570 static const struct builtin_description bdesc_comi[] =
16572 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16573 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16574 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16575 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16576 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16577 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16578 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16579 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16580 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16581 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16582 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16583 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16584 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16585 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16586 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16587 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16588 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16589 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16590 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16591 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16592 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16593 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16594 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16595 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16598 static const struct builtin_description bdesc_ptest[] =
16601 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16602 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16603 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16606 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16607 constant or xmm0. */
16608 static const struct builtin_description bdesc_sse_3arg[] =
16611 { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
16612 { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
16613 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
16614 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
16615 { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
16616 { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
16617 { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
16618 { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
16619 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
16620 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
16621 { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
16622 { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
16625 static const struct builtin_description bdesc_2arg[] =
16628 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16629 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16630 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16631 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16632 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16633 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16634 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16635 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16637 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16638 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16639 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16640 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16641 BUILTIN_DESC_SWAP_OPERANDS },
16642 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16643 BUILTIN_DESC_SWAP_OPERANDS },
16644 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16645 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16646 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16647 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16648 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16649 BUILTIN_DESC_SWAP_OPERANDS },
16650 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16651 BUILTIN_DESC_SWAP_OPERANDS },
16652 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16653 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16654 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16655 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16656 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16657 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16658 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16659 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16660 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16661 BUILTIN_DESC_SWAP_OPERANDS },
16662 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16663 BUILTIN_DESC_SWAP_OPERANDS },
16664 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16666 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16667 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16668 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16669 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16671 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16672 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16673 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16674 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16676 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16677 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16678 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16679 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16680 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16683 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16684 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16685 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16687 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16688 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16689 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16690 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16692 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16693 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16694 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16695 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16696 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16697 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16698 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16699 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16701 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16702 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16703 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16705 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16706 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16707 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16708 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16710 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16711 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16713 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16714 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16715 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16716 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16717 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16718 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16720 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16721 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16722 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16725 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16726 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16727 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16728 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16729 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16730 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16733 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16734 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16735 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16737 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16738 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16739 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16741 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16742 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16743 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16744 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16745 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16746 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16748 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16749 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16750 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16751 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16752 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16753 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16755 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16756 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16757 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16758 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16760 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16761 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16764 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16765 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16766 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16767 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16768 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16769 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16770 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16771 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16773 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16774 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16775 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16776 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16777 BUILTIN_DESC_SWAP_OPERANDS },
16778 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16779 BUILTIN_DESC_SWAP_OPERANDS },
16780 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16781 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16782 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16783 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16784 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16785 BUILTIN_DESC_SWAP_OPERANDS },
16786 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16787 BUILTIN_DESC_SWAP_OPERANDS },
16788 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16789 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16790 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16791 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16792 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16793 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16794 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16795 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16796 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16798 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16799 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16800 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16801 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16803 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16804 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16805 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16806 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16808 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16809 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16810 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16813 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16814 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16815 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16816 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16817 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16818 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16819 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16820 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16822 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16823 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16824 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16825 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16826 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16827 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16828 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16829 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16831 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16832 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16834 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16835 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16836 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16837 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16839 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16840 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16842 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16843 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16844 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16845 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16846 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16847 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16849 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16850 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16851 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16852 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16854 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16855 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16856 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16857 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16858 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16859 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16860 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16861 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16863 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16864 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16865 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16867 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16868 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16870 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16871 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16873 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16874 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16875 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16877 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16878 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16879 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16881 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16882 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16884 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16886 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16887 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16888 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16889 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16892 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16893 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16894 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16895 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16896 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16897 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16900 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16901 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16902 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16903 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16904 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16905 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16906 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16907 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16908 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16909 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16910 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16911 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16912 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16913 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16914 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16915 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16916 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16917 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16918 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16919 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16920 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16921 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16922 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16923 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
16926 { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
16927 { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
16928 { MASK_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
16929 { MASK_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
16930 { MASK_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
16931 { MASK_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
16932 { MASK_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
16933 { MASK_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
16934 { MASK_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
16935 { MASK_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
16936 { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
16937 { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
16940 static const struct builtin_description bdesc_1arg[] =
16942 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16943 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16945 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16946 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16947 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16949 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16950 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16951 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16952 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16953 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16954 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16956 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16957 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16959 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16961 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16962 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16964 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16965 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16966 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16967 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16968 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16970 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16972 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16973 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16974 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16975 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16977 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16978 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16979 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16982 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16983 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16986 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16987 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16988 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16989 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16990 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16991 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16994 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
16995 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
16996 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
16997 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
16998 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
16999 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
17000 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
17001 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
17002 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
17003 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
17004 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
17005 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
17006 { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
17008 /* Fake 1 arg builtins with a constant smaller than 8 bits as the
17010 { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
17011 { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
17014 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17015 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17018 ix86_init_mmx_sse_builtins (void)
17020 const struct builtin_description * d;
17023 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17024 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17025 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17026 tree V2DI_type_node
17027 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17028 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17029 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17030 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17031 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17032 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17033 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17035 tree pchar_type_node = build_pointer_type (char_type_node);
17036 tree pcchar_type_node = build_pointer_type (
17037 build_type_variant (char_type_node, 1, 0));
17038 tree pfloat_type_node = build_pointer_type (float_type_node);
17039 tree pcfloat_type_node = build_pointer_type (
17040 build_type_variant (float_type_node, 1, 0));
17041 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17042 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17043 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17046 tree int_ftype_v4sf_v4sf
17047 = build_function_type_list (integer_type_node,
17048 V4SF_type_node, V4SF_type_node, NULL_TREE);
17049 tree v4si_ftype_v4sf_v4sf
17050 = build_function_type_list (V4SI_type_node,
17051 V4SF_type_node, V4SF_type_node, NULL_TREE);
17052 /* MMX/SSE/integer conversions. */
17053 tree int_ftype_v4sf
17054 = build_function_type_list (integer_type_node,
17055 V4SF_type_node, NULL_TREE);
17056 tree int64_ftype_v4sf
17057 = build_function_type_list (long_long_integer_type_node,
17058 V4SF_type_node, NULL_TREE);
17059 tree int_ftype_v8qi
17060 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17061 tree v4sf_ftype_v4sf_int
17062 = build_function_type_list (V4SF_type_node,
17063 V4SF_type_node, integer_type_node, NULL_TREE);
17064 tree v4sf_ftype_v4sf_int64
17065 = build_function_type_list (V4SF_type_node,
17066 V4SF_type_node, long_long_integer_type_node,
17068 tree v4sf_ftype_v4sf_v2si
17069 = build_function_type_list (V4SF_type_node,
17070 V4SF_type_node, V2SI_type_node, NULL_TREE);
17072 /* Miscellaneous. */
17073 tree v8qi_ftype_v4hi_v4hi
17074 = build_function_type_list (V8QI_type_node,
17075 V4HI_type_node, V4HI_type_node, NULL_TREE);
17076 tree v4hi_ftype_v2si_v2si
17077 = build_function_type_list (V4HI_type_node,
17078 V2SI_type_node, V2SI_type_node, NULL_TREE);
17079 tree v4sf_ftype_v4sf_v4sf_int
17080 = build_function_type_list (V4SF_type_node,
17081 V4SF_type_node, V4SF_type_node,
17082 integer_type_node, NULL_TREE);
17083 tree v2si_ftype_v4hi_v4hi
17084 = build_function_type_list (V2SI_type_node,
17085 V4HI_type_node, V4HI_type_node, NULL_TREE);
17086 tree v4hi_ftype_v4hi_int
17087 = build_function_type_list (V4HI_type_node,
17088 V4HI_type_node, integer_type_node, NULL_TREE);
17089 tree v4hi_ftype_v4hi_di
17090 = build_function_type_list (V4HI_type_node,
17091 V4HI_type_node, long_long_unsigned_type_node,
17093 tree v2si_ftype_v2si_di
17094 = build_function_type_list (V2SI_type_node,
17095 V2SI_type_node, long_long_unsigned_type_node,
17097 tree void_ftype_void
17098 = build_function_type (void_type_node, void_list_node);
17099 tree void_ftype_unsigned
17100 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17101 tree void_ftype_unsigned_unsigned
17102 = build_function_type_list (void_type_node, unsigned_type_node,
17103 unsigned_type_node, NULL_TREE);
17104 tree void_ftype_pcvoid_unsigned_unsigned
17105 = build_function_type_list (void_type_node, const_ptr_type_node,
17106 unsigned_type_node, unsigned_type_node,
17108 tree unsigned_ftype_void
17109 = build_function_type (unsigned_type_node, void_list_node);
17110 tree v2si_ftype_v4sf
17111 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17112 /* Loads/stores. */
17113 tree void_ftype_v8qi_v8qi_pchar
17114 = build_function_type_list (void_type_node,
17115 V8QI_type_node, V8QI_type_node,
17116 pchar_type_node, NULL_TREE);
17117 tree v4sf_ftype_pcfloat
17118 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17119 /* @@@ the type is bogus */
17120 tree v4sf_ftype_v4sf_pv2si
17121 = build_function_type_list (V4SF_type_node,
17122 V4SF_type_node, pv2si_type_node, NULL_TREE);
17123 tree void_ftype_pv2si_v4sf
17124 = build_function_type_list (void_type_node,
17125 pv2si_type_node, V4SF_type_node, NULL_TREE);
17126 tree void_ftype_pfloat_v4sf
17127 = build_function_type_list (void_type_node,
17128 pfloat_type_node, V4SF_type_node, NULL_TREE);
17129 tree void_ftype_pdi_di
17130 = build_function_type_list (void_type_node,
17131 pdi_type_node, long_long_unsigned_type_node,
17133 tree void_ftype_pv2di_v2di
17134 = build_function_type_list (void_type_node,
17135 pv2di_type_node, V2DI_type_node, NULL_TREE);
17136 /* Normal vector unops. */
17137 tree v4sf_ftype_v4sf
17138 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17139 tree v16qi_ftype_v16qi
17140 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17141 tree v8hi_ftype_v8hi
17142 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17143 tree v4si_ftype_v4si
17144 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17145 tree v8qi_ftype_v8qi
17146 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17147 tree v4hi_ftype_v4hi
17148 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17150 /* Normal vector binops. */
17151 tree v4sf_ftype_v4sf_v4sf
17152 = build_function_type_list (V4SF_type_node,
17153 V4SF_type_node, V4SF_type_node, NULL_TREE);
17154 tree v8qi_ftype_v8qi_v8qi
17155 = build_function_type_list (V8QI_type_node,
17156 V8QI_type_node, V8QI_type_node, NULL_TREE);
17157 tree v4hi_ftype_v4hi_v4hi
17158 = build_function_type_list (V4HI_type_node,
17159 V4HI_type_node, V4HI_type_node, NULL_TREE);
17160 tree v2si_ftype_v2si_v2si
17161 = build_function_type_list (V2SI_type_node,
17162 V2SI_type_node, V2SI_type_node, NULL_TREE);
17163 tree di_ftype_di_di
17164 = build_function_type_list (long_long_unsigned_type_node,
17165 long_long_unsigned_type_node,
17166 long_long_unsigned_type_node, NULL_TREE);
17168 tree di_ftype_di_di_int
17169 = build_function_type_list (long_long_unsigned_type_node,
17170 long_long_unsigned_type_node,
17171 long_long_unsigned_type_node,
17172 integer_type_node, NULL_TREE);
17174 tree v2si_ftype_v2sf
17175 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17176 tree v2sf_ftype_v2si
17177 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17178 tree v2si_ftype_v2si
17179 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17180 tree v2sf_ftype_v2sf
17181 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17182 tree v2sf_ftype_v2sf_v2sf
17183 = build_function_type_list (V2SF_type_node,
17184 V2SF_type_node, V2SF_type_node, NULL_TREE);
17185 tree v2si_ftype_v2sf_v2sf
17186 = build_function_type_list (V2SI_type_node,
17187 V2SF_type_node, V2SF_type_node, NULL_TREE);
17188 tree pint_type_node = build_pointer_type (integer_type_node);
17189 tree pdouble_type_node = build_pointer_type (double_type_node);
17190 tree pcdouble_type_node = build_pointer_type (
17191 build_type_variant (double_type_node, 1, 0));
17192 tree int_ftype_v2df_v2df
17193 = build_function_type_list (integer_type_node,
17194 V2DF_type_node, V2DF_type_node, NULL_TREE);
17196 tree void_ftype_pcvoid
17197 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17198 tree v4sf_ftype_v4si
17199 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17200 tree v4si_ftype_v4sf
17201 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17202 tree v2df_ftype_v4si
17203 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17204 tree v4si_ftype_v2df
17205 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17206 tree v2si_ftype_v2df
17207 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17208 tree v4sf_ftype_v2df
17209 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17210 tree v2df_ftype_v2si
17211 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17212 tree v2df_ftype_v4sf
17213 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17214 tree int_ftype_v2df
17215 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17216 tree int64_ftype_v2df
17217 = build_function_type_list (long_long_integer_type_node,
17218 V2DF_type_node, NULL_TREE);
17219 tree v2df_ftype_v2df_int
17220 = build_function_type_list (V2DF_type_node,
17221 V2DF_type_node, integer_type_node, NULL_TREE);
17222 tree v2df_ftype_v2df_int64
17223 = build_function_type_list (V2DF_type_node,
17224 V2DF_type_node, long_long_integer_type_node,
17226 tree v4sf_ftype_v4sf_v2df
17227 = build_function_type_list (V4SF_type_node,
17228 V4SF_type_node, V2DF_type_node, NULL_TREE);
17229 tree v2df_ftype_v2df_v4sf
17230 = build_function_type_list (V2DF_type_node,
17231 V2DF_type_node, V4SF_type_node, NULL_TREE);
17232 tree v2df_ftype_v2df_v2df_int
17233 = build_function_type_list (V2DF_type_node,
17234 V2DF_type_node, V2DF_type_node,
17237 tree v2df_ftype_v2df_pcdouble
17238 = build_function_type_list (V2DF_type_node,
17239 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17240 tree void_ftype_pdouble_v2df
17241 = build_function_type_list (void_type_node,
17242 pdouble_type_node, V2DF_type_node, NULL_TREE);
17243 tree void_ftype_pint_int
17244 = build_function_type_list (void_type_node,
17245 pint_type_node, integer_type_node, NULL_TREE);
17246 tree void_ftype_v16qi_v16qi_pchar
17247 = build_function_type_list (void_type_node,
17248 V16QI_type_node, V16QI_type_node,
17249 pchar_type_node, NULL_TREE);
17250 tree v2df_ftype_pcdouble
17251 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17252 tree v2df_ftype_v2df_v2df
17253 = build_function_type_list (V2DF_type_node,
17254 V2DF_type_node, V2DF_type_node, NULL_TREE);
17255 tree v16qi_ftype_v16qi_v16qi
17256 = build_function_type_list (V16QI_type_node,
17257 V16QI_type_node, V16QI_type_node, NULL_TREE);
17258 tree v8hi_ftype_v8hi_v8hi
17259 = build_function_type_list (V8HI_type_node,
17260 V8HI_type_node, V8HI_type_node, NULL_TREE);
17261 tree v4si_ftype_v4si_v4si
17262 = build_function_type_list (V4SI_type_node,
17263 V4SI_type_node, V4SI_type_node, NULL_TREE);
17264 tree v2di_ftype_v2di_v2di
17265 = build_function_type_list (V2DI_type_node,
17266 V2DI_type_node, V2DI_type_node, NULL_TREE);
17267 tree v2di_ftype_v2df_v2df
17268 = build_function_type_list (V2DI_type_node,
17269 V2DF_type_node, V2DF_type_node, NULL_TREE);
17270 tree v2df_ftype_v2df
17271 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17272 tree v2di_ftype_v2di_int
17273 = build_function_type_list (V2DI_type_node,
17274 V2DI_type_node, integer_type_node, NULL_TREE);
17275 tree v2di_ftype_v2di_v2di_int
17276 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17277 V2DI_type_node, integer_type_node, NULL_TREE);
17278 tree v4si_ftype_v4si_int
17279 = build_function_type_list (V4SI_type_node,
17280 V4SI_type_node, integer_type_node, NULL_TREE);
17281 tree v8hi_ftype_v8hi_int
17282 = build_function_type_list (V8HI_type_node,
17283 V8HI_type_node, integer_type_node, NULL_TREE);
17284 tree v4si_ftype_v8hi_v8hi
17285 = build_function_type_list (V4SI_type_node,
17286 V8HI_type_node, V8HI_type_node, NULL_TREE);
17287 tree di_ftype_v8qi_v8qi
17288 = build_function_type_list (long_long_unsigned_type_node,
17289 V8QI_type_node, V8QI_type_node, NULL_TREE);
17290 tree di_ftype_v2si_v2si
17291 = build_function_type_list (long_long_unsigned_type_node,
17292 V2SI_type_node, V2SI_type_node, NULL_TREE);
17293 tree v2di_ftype_v16qi_v16qi
17294 = build_function_type_list (V2DI_type_node,
17295 V16QI_type_node, V16QI_type_node, NULL_TREE);
17296 tree v2di_ftype_v4si_v4si
17297 = build_function_type_list (V2DI_type_node,
17298 V4SI_type_node, V4SI_type_node, NULL_TREE);
17299 tree int_ftype_v16qi
17300 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17301 tree v16qi_ftype_pcchar
17302 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17303 tree void_ftype_pchar_v16qi
17304 = build_function_type_list (void_type_node,
17305 pchar_type_node, V16QI_type_node, NULL_TREE);
17307 tree v2di_ftype_v2di_unsigned_unsigned
17308 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17309 unsigned_type_node, unsigned_type_node,
17311 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17312 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17313 unsigned_type_node, unsigned_type_node,
17315 tree v2di_ftype_v2di_v16qi
17316 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17318 tree v2df_ftype_v2df_v2df_v2df
17319 = build_function_type_list (V2DF_type_node,
17320 V2DF_type_node, V2DF_type_node,
17321 V2DF_type_node, NULL_TREE);
17322 tree v4sf_ftype_v4sf_v4sf_v4sf
17323 = build_function_type_list (V4SF_type_node,
17324 V4SF_type_node, V4SF_type_node,
17325 V4SF_type_node, NULL_TREE);
17326 tree v8hi_ftype_v16qi
17327 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17329 tree v4si_ftype_v16qi
17330 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17332 tree v2di_ftype_v16qi
17333 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17335 tree v4si_ftype_v8hi
17336 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17338 tree v2di_ftype_v8hi
17339 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17341 tree v2di_ftype_v4si
17342 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17344 tree v2di_ftype_pv2di
17345 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17347 tree v16qi_ftype_v16qi_v16qi_int
17348 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17349 V16QI_type_node, integer_type_node,
17351 tree v16qi_ftype_v16qi_v16qi_v16qi
17352 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17353 V16QI_type_node, V16QI_type_node,
17355 tree v8hi_ftype_v8hi_v8hi_int
17356 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17357 V8HI_type_node, integer_type_node,
17359 tree v4si_ftype_v4si_v4si_int
17360 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17361 V4SI_type_node, integer_type_node,
17363 tree int_ftype_v2di_v2di
17364 = build_function_type_list (integer_type_node,
17365 V2DI_type_node, V2DI_type_node,
17369 tree float128_type;
17372 /* The __float80 type. */
17373 if (TYPE_MODE (long_double_type_node) == XFmode)
17374 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17378 /* The __float80 type. */
17379 float80_type = make_node (REAL_TYPE);
17380 TYPE_PRECISION (float80_type) = 80;
17381 layout_type (float80_type);
17382 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17387 float128_type = make_node (REAL_TYPE);
17388 TYPE_PRECISION (float128_type) = 128;
17389 layout_type (float128_type);
17390 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17393 /* Add all SSE builtins that are more or less simple operations on
17395 for (i = 0, d = bdesc_sse_3arg;
17396 i < ARRAY_SIZE (bdesc_sse_3arg);
17399 /* Use one of the operands; the target can have a different mode for
17400 mask-generating compares. */
17401 enum machine_mode mode;
17406 mode = insn_data[d->icode].operand[1].mode;
17411 type = v16qi_ftype_v16qi_v16qi_int;
17414 type = v8hi_ftype_v8hi_v8hi_int;
17417 type = v4si_ftype_v4si_v4si_int;
17420 type = v2di_ftype_v2di_v2di_int;
17423 type = v2df_ftype_v2df_v2df_int;
17426 type = v4sf_ftype_v4sf_v4sf_int;
17429 gcc_unreachable ();
17432 /* Override for variable blends. */
17435 case CODE_FOR_sse4_1_blendvpd:
17436 type = v2df_ftype_v2df_v2df_v2df;
17438 case CODE_FOR_sse4_1_blendvps:
17439 type = v4sf_ftype_v4sf_v4sf_v4sf;
17441 case CODE_FOR_sse4_1_pblendvb:
17442 type = v16qi_ftype_v16qi_v16qi_v16qi;
17448 def_builtin (d->mask, d->name, type, d->code);
17451 /* Add all builtins that are more or less simple operations on two
17453 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17455 /* Use one of the operands; the target can have a different mode for
17456 mask-generating compares. */
17457 enum machine_mode mode;
17462 mode = insn_data[d->icode].operand[1].mode;
17467 type = v16qi_ftype_v16qi_v16qi;
17470 type = v8hi_ftype_v8hi_v8hi;
17473 type = v4si_ftype_v4si_v4si;
17476 type = v2di_ftype_v2di_v2di;
17479 type = v2df_ftype_v2df_v2df;
17482 type = v4sf_ftype_v4sf_v4sf;
17485 type = v8qi_ftype_v8qi_v8qi;
17488 type = v4hi_ftype_v4hi_v4hi;
17491 type = v2si_ftype_v2si_v2si;
17494 type = di_ftype_di_di;
17498 gcc_unreachable ();
17501 /* Override for comparisons. */
17502 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17503 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17504 type = v4si_ftype_v4sf_v4sf;
17506 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17507 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17508 type = v2di_ftype_v2df_v2df;
17510 def_builtin (d->mask, d->name, type, d->code);
17513 /* Add all builtins that are more or less simple operations on 1 operand. */
17514 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17516 enum machine_mode mode;
17521 mode = insn_data[d->icode].operand[1].mode;
17526 type = v16qi_ftype_v16qi;
17529 type = v8hi_ftype_v8hi;
17532 type = v4si_ftype_v4si;
17535 type = v2df_ftype_v2df;
17538 type = v4sf_ftype_v4sf;
17541 type = v8qi_ftype_v8qi;
17544 type = v4hi_ftype_v4hi;
17547 type = v2si_ftype_v2si;
17554 def_builtin (d->mask, d->name, type, d->code);
17557 /* Add the remaining MMX insns with somewhat more complicated types. */
17558 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17559 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17560 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17561 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17563 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17564 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17565 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17567 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17568 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17570 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17571 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17573 /* comi/ucomi insns. */
17574 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17575 if (d->mask == MASK_SSE2)
17576 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17578 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17581 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17582 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17584 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17585 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17586 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17588 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17589 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17590 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17591 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17592 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17593 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17594 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17595 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17596 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17597 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17598 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17600 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17602 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17603 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17605 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17606 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17607 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17608 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17610 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17611 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17612 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17613 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17615 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17617 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17619 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17620 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17621 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17622 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17623 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17624 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17626 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17628 /* Original 3DNow! */
17629 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17630 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17631 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17641 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17642 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17644 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17645 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17647 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17648 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17650 /* 3DNow! extension as used in the Athlon CPU. */
17651 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17652 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17653 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17654 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17655 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17656 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17659 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17661 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17662 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17664 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17665 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17667 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17668 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17669 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17670 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17671 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17673 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17674 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17675 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17676 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17678 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17679 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17681 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17683 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17684 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17686 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17687 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17688 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17689 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17690 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17692 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17694 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17695 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17696 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17697 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17699 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17700 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17701 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17703 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17704 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17705 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17706 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17708 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17709 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17710 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17712 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17713 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17715 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17716 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17718 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17719 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17720 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17721 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17722 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17723 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17724 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17726 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17727 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17728 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17729 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17730 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17731 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17732 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17734 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17735 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17736 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17737 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17739 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17741 /* Prescott New Instructions. */
17742 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17743 void_ftype_pcvoid_unsigned_unsigned,
17744 IX86_BUILTIN_MONITOR);
17745 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17746 void_ftype_unsigned_unsigned,
17747 IX86_BUILTIN_MWAIT);
17748 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17749 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17752 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17753 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17754 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17755 IX86_BUILTIN_PALIGNR);
17758 def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa",
17759 v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
17760 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128",
17761 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
17762 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128",
17763 v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
17764 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128",
17765 v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
17766 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128",
17767 v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
17768 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128",
17769 v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
17770 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128",
17771 v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
17772 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128",
17773 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
17774 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128",
17775 v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
17776 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128",
17777 v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
17778 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128",
17779 v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
17780 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128",
17781 v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
17782 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128",
17783 v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
17784 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128",
17785 v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
17786 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd",
17787 v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
17788 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps",
17789 v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
17790 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd",
17791 v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
17792 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss",
17793 v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
17795 /* AMDFAM10 SSE4A New built-ins */
17796 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17797 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17798 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17799 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17800 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17801 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17802 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17803 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17804 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17805 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17806 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17807 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17809 /* Access to the vec_init patterns. */
17810 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17811 integer_type_node, NULL_TREE);
17812 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17813 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17815 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17816 short_integer_type_node,
17817 short_integer_type_node,
17818 short_integer_type_node, NULL_TREE);
17819 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17820 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17822 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17823 char_type_node, char_type_node,
17824 char_type_node, char_type_node,
17825 char_type_node, char_type_node,
17826 char_type_node, NULL_TREE);
17827 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17828 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17830 /* Access to the vec_extract patterns. */
17831 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17832 integer_type_node, NULL_TREE);
17833 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
17834 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17836 ftype = build_function_type_list (long_long_integer_type_node,
17837 V2DI_type_node, integer_type_node,
17839 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
17840 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17842 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17843 integer_type_node, NULL_TREE);
17844 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17845 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17847 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17848 integer_type_node, NULL_TREE);
17849 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
17850 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17852 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17853 integer_type_node, NULL_TREE);
17854 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
17855 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17857 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17858 integer_type_node, NULL_TREE);
17859 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17860 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17862 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17863 integer_type_node, NULL_TREE);
17864 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17865 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17867 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17868 integer_type_node, NULL_TREE);
17869 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi",
17870 ftype, IX86_BUILTIN_VEC_EXT_V16QI);
17872 /* Access to the vec_set patterns. */
17873 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17875 integer_type_node, NULL_TREE);
17876 def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di",
17877 ftype, IX86_BUILTIN_VEC_SET_V2DI);
17879 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17881 integer_type_node, NULL_TREE);
17882 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf",
17883 ftype, IX86_BUILTIN_VEC_SET_V4SF);
17885 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17887 integer_type_node, NULL_TREE);
17888 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si",
17889 ftype, IX86_BUILTIN_VEC_SET_V4SI);
17891 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17893 integer_type_node, NULL_TREE);
17894 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
17895 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17897 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17899 integer_type_node, NULL_TREE);
17900 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17901 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17903 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17905 integer_type_node, NULL_TREE);
17906 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi",
17907 ftype, IX86_BUILTIN_VEC_SET_V16QI);
17911 ix86_init_builtins (void)
17914 ix86_init_mmx_sse_builtins ();
17917 /* Errors in the source file can cause expand_expr to return const0_rtx
17918 where we expect a vector. To avoid crashing, use one of the vector
17919 clear instructions. */
17921 safe_vector_operand (rtx x, enum machine_mode mode)
17923 if (x == const0_rtx)
17924 x = CONST0_RTX (mode);
17928 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
17929 4 operands. The third argument must be a constant smaller than 8
17933 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
17937 tree arg0 = CALL_EXPR_ARG (exp, 0);
17938 tree arg1 = CALL_EXPR_ARG (exp, 1);
17939 tree arg2 = CALL_EXPR_ARG (exp, 2);
17940 rtx op0 = expand_normal (arg0);
17941 rtx op1 = expand_normal (arg1);
17942 rtx op2 = expand_normal (arg2);
17943 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17944 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17945 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17946 enum machine_mode mode2;
17949 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17950 op0 = copy_to_mode_reg (mode0, op0);
17951 if ((optimize && !register_operand (op1, mode1))
17952 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17953 op1 = copy_to_mode_reg (mode1, op1);
17957 case CODE_FOR_sse4_1_blendvpd:
17958 case CODE_FOR_sse4_1_blendvps:
17959 case CODE_FOR_sse4_1_pblendvb:
17960 /* The third argument of variable blends must be xmm0. */
17961 xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
17962 emit_move_insn (xmm0, op2);
17966 mode2 = insn_data[icode].operand[2].mode;
17967 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17971 case CODE_FOR_sse4_1_roundsd:
17972 case CODE_FOR_sse4_1_roundss:
17973 error ("the third argument must be a 4-bit immediate");
17976 error ("the third argument must be a 8-bit immediate");
17986 || GET_MODE (target) != tmode
17987 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17988 target = gen_reg_rtx (tmode);
17989 pat = GEN_FCN (icode) (target, op0, op1, op2);
17996 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17999 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18002 tree arg0 = CALL_EXPR_ARG (exp, 0);
18003 tree arg1 = CALL_EXPR_ARG (exp, 1);
18004 rtx op0 = expand_normal (arg0);
18005 rtx op1 = expand_normal (arg1);
18006 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18007 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18008 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18010 if (VECTOR_MODE_P (mode0))
18011 op0 = safe_vector_operand (op0, mode0);
18012 if (VECTOR_MODE_P (mode1))
18013 op1 = safe_vector_operand (op1, mode1);
18015 if (optimize || !target
18016 || GET_MODE (target) != tmode
18017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18018 target = gen_reg_rtx (tmode);
18020 if (GET_MODE (op1) == SImode && mode1 == TImode)
18022 rtx x = gen_reg_rtx (V4SImode);
18023 emit_insn (gen_sse2_loadd (x, op1));
18024 op1 = gen_lowpart (TImode, x);
18027 /* The insn must want input operands in the same modes as the
18029 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18030 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18032 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18033 op0 = copy_to_mode_reg (mode0, op0);
18034 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18035 op1 = copy_to_mode_reg (mode1, op1);
18037 /* ??? Using ix86_fixup_binary_operands is problematic when
18038 we've got mismatched modes. Fake it. */
18044 if (tmode == mode0 && tmode == mode1)
18046 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18050 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18052 op0 = force_reg (mode0, op0);
18053 op1 = force_reg (mode1, op1);
18054 target = gen_reg_rtx (tmode);
18057 pat = GEN_FCN (icode) (target, op0, op1);
18064 /* Subroutine of ix86_expand_builtin to take care of stores. */
18067 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18070 tree arg0 = CALL_EXPR_ARG (exp, 0);
18071 tree arg1 = CALL_EXPR_ARG (exp, 1);
18072 rtx op0 = expand_normal (arg0);
18073 rtx op1 = expand_normal (arg1);
18074 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18075 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18077 if (VECTOR_MODE_P (mode1))
18078 op1 = safe_vector_operand (op1, mode1);
18080 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18081 op1 = copy_to_mode_reg (mode1, op1);
18083 pat = GEN_FCN (icode) (op0, op1);
18089 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18092 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18093 rtx target, int do_load)
18096 tree arg0 = CALL_EXPR_ARG (exp, 0);
18097 rtx op0 = expand_normal (arg0);
18098 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18099 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18101 if (optimize || !target
18102 || GET_MODE (target) != tmode
18103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18104 target = gen_reg_rtx (tmode);
18106 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18109 if (VECTOR_MODE_P (mode0))
18110 op0 = safe_vector_operand (op0, mode0);
18112 if ((optimize && !register_operand (op0, mode0))
18113 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18114 op0 = copy_to_mode_reg (mode0, op0);
18119 case CODE_FOR_sse4_1_roundpd:
18120 case CODE_FOR_sse4_1_roundps:
18122 tree arg1 = CALL_EXPR_ARG (exp, 1);
18123 rtx op1 = expand_normal (arg1);
18124 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18128 error ("the second argument must be a 4-bit immediate");
18131 pat = GEN_FCN (icode) (target, op0, op1);
18135 pat = GEN_FCN (icode) (target, op0);
18145 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18146 sqrtss, rsqrtss, rcpss. */
18149 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18152 tree arg0 = CALL_EXPR_ARG (exp, 0);
18153 rtx op1, op0 = expand_normal (arg0);
18154 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18155 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18157 if (optimize || !target
18158 || GET_MODE (target) != tmode
18159 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18160 target = gen_reg_rtx (tmode);
18162 if (VECTOR_MODE_P (mode0))
18163 op0 = safe_vector_operand (op0, mode0);
18165 if ((optimize && !register_operand (op0, mode0))
18166 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18167 op0 = copy_to_mode_reg (mode0, op0);
18170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18171 op1 = copy_to_mode_reg (mode0, op1);
18173 pat = GEN_FCN (icode) (target, op0, op1);
18180 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18183 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18187 tree arg0 = CALL_EXPR_ARG (exp, 0);
18188 tree arg1 = CALL_EXPR_ARG (exp, 1);
18189 rtx op0 = expand_normal (arg0);
18190 rtx op1 = expand_normal (arg1);
18192 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18193 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18194 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18195 enum rtx_code comparison = d->comparison;
18197 if (VECTOR_MODE_P (mode0))
18198 op0 = safe_vector_operand (op0, mode0);
18199 if (VECTOR_MODE_P (mode1))
18200 op1 = safe_vector_operand (op1, mode1);
18202 /* Swap operands if we have a comparison that isn't available in
18204 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18206 rtx tmp = gen_reg_rtx (mode1);
18207 emit_move_insn (tmp, op1);
18212 if (optimize || !target
18213 || GET_MODE (target) != tmode
18214 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18215 target = gen_reg_rtx (tmode);
18217 if ((optimize && !register_operand (op0, mode0))
18218 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18219 op0 = copy_to_mode_reg (mode0, op0);
18220 if ((optimize && !register_operand (op1, mode1))
18221 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18222 op1 = copy_to_mode_reg (mode1, op1);
18224 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18225 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18232 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18235 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18239 tree arg0 = CALL_EXPR_ARG (exp, 0);
18240 tree arg1 = CALL_EXPR_ARG (exp, 1);
18241 rtx op0 = expand_normal (arg0);
18242 rtx op1 = expand_normal (arg1);
18243 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18244 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18245 enum rtx_code comparison = d->comparison;
18247 if (VECTOR_MODE_P (mode0))
18248 op0 = safe_vector_operand (op0, mode0);
18249 if (VECTOR_MODE_P (mode1))
18250 op1 = safe_vector_operand (op1, mode1);
18252 /* Swap operands if we have a comparison that isn't available in
18254 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18261 target = gen_reg_rtx (SImode);
18262 emit_move_insn (target, const0_rtx);
18263 target = gen_rtx_SUBREG (QImode, target, 0);
18265 if ((optimize && !register_operand (op0, mode0))
18266 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18267 op0 = copy_to_mode_reg (mode0, op0);
18268 if ((optimize && !register_operand (op1, mode1))
18269 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18270 op1 = copy_to_mode_reg (mode1, op1);
18272 pat = GEN_FCN (d->icode) (op0, op1);
18276 emit_insn (gen_rtx_SET (VOIDmode,
18277 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18278 gen_rtx_fmt_ee (comparison, QImode,
18282 return SUBREG_REG (target);
18285 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18288 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18292 tree arg0 = CALL_EXPR_ARG (exp, 0);
18293 tree arg1 = CALL_EXPR_ARG (exp, 1);
18294 rtx op0 = expand_normal (arg0);
18295 rtx op1 = expand_normal (arg1);
18296 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18297 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18298 enum rtx_code comparison = d->comparison;
18300 if (VECTOR_MODE_P (mode0))
18301 op0 = safe_vector_operand (op0, mode0);
18302 if (VECTOR_MODE_P (mode1))
18303 op1 = safe_vector_operand (op1, mode1);
18305 target = gen_reg_rtx (SImode);
18306 emit_move_insn (target, const0_rtx);
18307 target = gen_rtx_SUBREG (QImode, target, 0);
18309 if ((optimize && !register_operand (op0, mode0))
18310 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18311 op0 = copy_to_mode_reg (mode0, op0);
18312 if ((optimize && !register_operand (op1, mode1))
18313 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18314 op1 = copy_to_mode_reg (mode1, op1);
18316 pat = GEN_FCN (d->icode) (op0, op1);
18320 emit_insn (gen_rtx_SET (VOIDmode,
18321 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18322 gen_rtx_fmt_ee (comparison, QImode,
18326 return SUBREG_REG (target);
18329 /* Return the integer constant in ARG. Constrain it to be in the range
18330 of the subparts of VEC_TYPE; issue an error if not. */
18333 get_element_number (tree vec_type, tree arg)
18335 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18337 if (!host_integerp (arg, 1)
18338 || (elt = tree_low_cst (arg, 1), elt > max))
18340 error ("selector must be an integer constant in the range 0..%wi", max);
18347 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18348 ix86_expand_vector_init. We DO have language-level syntax for this, in
18349 the form of (type){ init-list }. Except that since we can't place emms
18350 instructions from inside the compiler, we can't allow the use of MMX
18351 registers unless the user explicitly asks for it. So we do *not* define
18352 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18353 we have builtins invoked by mmintrin.h that gives us license to emit
18354 these sorts of instructions. */
18357 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18359 enum machine_mode tmode = TYPE_MODE (type);
18360 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18361 int i, n_elt = GET_MODE_NUNITS (tmode);
18362 rtvec v = rtvec_alloc (n_elt);
18364 gcc_assert (VECTOR_MODE_P (tmode));
18365 gcc_assert (call_expr_nargs (exp) == n_elt);
18367 for (i = 0; i < n_elt; ++i)
18369 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18370 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18373 if (!target || !register_operand (target, tmode))
18374 target = gen_reg_rtx (tmode);
18376 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18380 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18381 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18382 had a language-level syntax for referencing vector elements. */
18385 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18387 enum machine_mode tmode, mode0;
18392 arg0 = CALL_EXPR_ARG (exp, 0);
18393 arg1 = CALL_EXPR_ARG (exp, 1);
18395 op0 = expand_normal (arg0);
18396 elt = get_element_number (TREE_TYPE (arg0), arg1);
18398 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18399 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18400 gcc_assert (VECTOR_MODE_P (mode0));
18402 op0 = force_reg (mode0, op0);
18404 if (optimize || !target || !register_operand (target, tmode))
18405 target = gen_reg_rtx (tmode);
18407 ix86_expand_vector_extract (true, target, op0, elt);
18412 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18413 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18414 a language-level syntax for referencing vector elements. */
18417 ix86_expand_vec_set_builtin (tree exp)
18419 enum machine_mode tmode, mode1;
18420 tree arg0, arg1, arg2;
18422 rtx op0, op1, target;
18424 arg0 = CALL_EXPR_ARG (exp, 0);
18425 arg1 = CALL_EXPR_ARG (exp, 1);
18426 arg2 = CALL_EXPR_ARG (exp, 2);
18428 tmode = TYPE_MODE (TREE_TYPE (arg0));
18429 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18430 gcc_assert (VECTOR_MODE_P (tmode));
18432 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
18433 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
18434 elt = get_element_number (TREE_TYPE (arg0), arg2);
18436 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
18437 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
18439 op0 = force_reg (tmode, op0);
18440 op1 = force_reg (mode1, op1);
18442 /* OP0 is the source of these builtin functions and shouldn't be
18443 modified. Create a copy, use it and return it as target. */
18444 target = gen_reg_rtx (tmode);
18445 emit_move_insn (target, op0);
18446 ix86_expand_vector_set (true, target, op1, elt);
18451 /* Expand an expression EXP that calls a built-in function,
18452 with result going to TARGET if that's convenient
18453 (and in mode MODE if that's convenient).
18454 SUBTARGET may be used as the target for computing one of EXP's operands.
18455 IGNORE is nonzero if the value is to be ignored. */
18458 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18459 enum machine_mode mode ATTRIBUTE_UNUSED,
18460 int ignore ATTRIBUTE_UNUSED)
18462 const struct builtin_description *d;
18464 enum insn_code icode;
18465 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18466 tree arg0, arg1, arg2, arg3;
18467 rtx op0, op1, op2, op3, pat;
18468 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18469 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18473 case IX86_BUILTIN_EMMS:
18474 emit_insn (gen_mmx_emms ());
18477 case IX86_BUILTIN_SFENCE:
18478 emit_insn (gen_sse_sfence ());
18481 case IX86_BUILTIN_MASKMOVQ:
18482 case IX86_BUILTIN_MASKMOVDQU:
18483 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18484 ? CODE_FOR_mmx_maskmovq
18485 : CODE_FOR_sse2_maskmovdqu);
18486 /* Note the arg order is different from the operand order. */
18487 arg1 = CALL_EXPR_ARG (exp, 0);
18488 arg2 = CALL_EXPR_ARG (exp, 1);
18489 arg0 = CALL_EXPR_ARG (exp, 2);
18490 op0 = expand_normal (arg0);
18491 op1 = expand_normal (arg1);
18492 op2 = expand_normal (arg2);
18493 mode0 = insn_data[icode].operand[0].mode;
18494 mode1 = insn_data[icode].operand[1].mode;
18495 mode2 = insn_data[icode].operand[2].mode;
18497 op0 = force_reg (Pmode, op0);
18498 op0 = gen_rtx_MEM (mode1, op0);
18500 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18501 op0 = copy_to_mode_reg (mode0, op0);
18502 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18503 op1 = copy_to_mode_reg (mode1, op1);
18504 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18505 op2 = copy_to_mode_reg (mode2, op2);
18506 pat = GEN_FCN (icode) (op0, op1, op2);
18512 case IX86_BUILTIN_SQRTSS:
18513 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18514 case IX86_BUILTIN_RSQRTSS:
18515 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18516 case IX86_BUILTIN_RCPSS:
18517 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18519 case IX86_BUILTIN_LOADUPS:
18520 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18522 case IX86_BUILTIN_STOREUPS:
18523 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18525 case IX86_BUILTIN_LOADHPS:
18526 case IX86_BUILTIN_LOADLPS:
18527 case IX86_BUILTIN_LOADHPD:
18528 case IX86_BUILTIN_LOADLPD:
18529 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18530 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18531 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18532 : CODE_FOR_sse2_loadlpd);
18533 arg0 = CALL_EXPR_ARG (exp, 0);
18534 arg1 = CALL_EXPR_ARG (exp, 1);
18535 op0 = expand_normal (arg0);
18536 op1 = expand_normal (arg1);
18537 tmode = insn_data[icode].operand[0].mode;
18538 mode0 = insn_data[icode].operand[1].mode;
18539 mode1 = insn_data[icode].operand[2].mode;
18541 op0 = force_reg (mode0, op0);
18542 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18543 if (optimize || target == 0
18544 || GET_MODE (target) != tmode
18545 || !register_operand (target, tmode))
18546 target = gen_reg_rtx (tmode);
18547 pat = GEN_FCN (icode) (target, op0, op1);
18553 case IX86_BUILTIN_STOREHPS:
18554 case IX86_BUILTIN_STORELPS:
18555 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18556 : CODE_FOR_sse_storelps);
18557 arg0 = CALL_EXPR_ARG (exp, 0);
18558 arg1 = CALL_EXPR_ARG (exp, 1);
18559 op0 = expand_normal (arg0);
18560 op1 = expand_normal (arg1);
18561 mode0 = insn_data[icode].operand[0].mode;
18562 mode1 = insn_data[icode].operand[1].mode;
18564 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18565 op1 = force_reg (mode1, op1);
18567 pat = GEN_FCN (icode) (op0, op1);
18573 case IX86_BUILTIN_MOVNTPS:
18574 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18575 case IX86_BUILTIN_MOVNTQ:
18576 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18578 case IX86_BUILTIN_LDMXCSR:
18579 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18580 target = assign_386_stack_local (SImode, SLOT_TEMP);
18581 emit_move_insn (target, op0);
18582 emit_insn (gen_sse_ldmxcsr (target));
18585 case IX86_BUILTIN_STMXCSR:
18586 target = assign_386_stack_local (SImode, SLOT_TEMP);
18587 emit_insn (gen_sse_stmxcsr (target));
18588 return copy_to_mode_reg (SImode, target);
18590 case IX86_BUILTIN_SHUFPS:
18591 case IX86_BUILTIN_SHUFPD:
18592 icode = (fcode == IX86_BUILTIN_SHUFPS
18593 ? CODE_FOR_sse_shufps
18594 : CODE_FOR_sse2_shufpd);
18595 arg0 = CALL_EXPR_ARG (exp, 0);
18596 arg1 = CALL_EXPR_ARG (exp, 1);
18597 arg2 = CALL_EXPR_ARG (exp, 2);
18598 op0 = expand_normal (arg0);
18599 op1 = expand_normal (arg1);
18600 op2 = expand_normal (arg2);
18601 tmode = insn_data[icode].operand[0].mode;
18602 mode0 = insn_data[icode].operand[1].mode;
18603 mode1 = insn_data[icode].operand[2].mode;
18604 mode2 = insn_data[icode].operand[3].mode;
18606 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18607 op0 = copy_to_mode_reg (mode0, op0);
18608 if ((optimize && !register_operand (op1, mode1))
18609 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18610 op1 = copy_to_mode_reg (mode1, op1);
18611 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18613 /* @@@ better error message */
18614 error ("mask must be an immediate");
18615 return gen_reg_rtx (tmode);
18617 if (optimize || target == 0
18618 || GET_MODE (target) != tmode
18619 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18620 target = gen_reg_rtx (tmode);
18621 pat = GEN_FCN (icode) (target, op0, op1, op2);
18627 case IX86_BUILTIN_PSHUFW:
18628 case IX86_BUILTIN_PSHUFD:
18629 case IX86_BUILTIN_PSHUFHW:
18630 case IX86_BUILTIN_PSHUFLW:
18631 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18632 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18633 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18634 : CODE_FOR_mmx_pshufw);
18635 arg0 = CALL_EXPR_ARG (exp, 0);
18636 arg1 = CALL_EXPR_ARG (exp, 1);
18637 op0 = expand_normal (arg0);
18638 op1 = expand_normal (arg1);
18639 tmode = insn_data[icode].operand[0].mode;
18640 mode1 = insn_data[icode].operand[1].mode;
18641 mode2 = insn_data[icode].operand[2].mode;
18643 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18644 op0 = copy_to_mode_reg (mode1, op0);
18645 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18647 /* @@@ better error message */
18648 error ("mask must be an immediate");
18652 || GET_MODE (target) != tmode
18653 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18654 target = gen_reg_rtx (tmode);
18655 pat = GEN_FCN (icode) (target, op0, op1);
18661 case IX86_BUILTIN_PSLLWI128:
18662 icode = CODE_FOR_ashlv8hi3;
18664 case IX86_BUILTIN_PSLLDI128:
18665 icode = CODE_FOR_ashlv4si3;
18667 case IX86_BUILTIN_PSLLQI128:
18668 icode = CODE_FOR_ashlv2di3;
18670 case IX86_BUILTIN_PSRAWI128:
18671 icode = CODE_FOR_ashrv8hi3;
18673 case IX86_BUILTIN_PSRADI128:
18674 icode = CODE_FOR_ashrv4si3;
18676 case IX86_BUILTIN_PSRLWI128:
18677 icode = CODE_FOR_lshrv8hi3;
18679 case IX86_BUILTIN_PSRLDI128:
18680 icode = CODE_FOR_lshrv4si3;
18682 case IX86_BUILTIN_PSRLQI128:
18683 icode = CODE_FOR_lshrv2di3;
18686 arg0 = CALL_EXPR_ARG (exp, 0);
18687 arg1 = CALL_EXPR_ARG (exp, 1);
18688 op0 = expand_normal (arg0);
18689 op1 = expand_normal (arg1);
18691 if (!CONST_INT_P (op1))
18693 error ("shift must be an immediate");
18696 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18697 op1 = GEN_INT (255);
18699 tmode = insn_data[icode].operand[0].mode;
18700 mode1 = insn_data[icode].operand[1].mode;
18701 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18702 op0 = copy_to_reg (op0);
18704 target = gen_reg_rtx (tmode);
18705 pat = GEN_FCN (icode) (target, op0, op1);
18711 case IX86_BUILTIN_PSLLW128:
18712 icode = CODE_FOR_ashlv8hi3;
18714 case IX86_BUILTIN_PSLLD128:
18715 icode = CODE_FOR_ashlv4si3;
18717 case IX86_BUILTIN_PSLLQ128:
18718 icode = CODE_FOR_ashlv2di3;
18720 case IX86_BUILTIN_PSRAW128:
18721 icode = CODE_FOR_ashrv8hi3;
18723 case IX86_BUILTIN_PSRAD128:
18724 icode = CODE_FOR_ashrv4si3;
18726 case IX86_BUILTIN_PSRLW128:
18727 icode = CODE_FOR_lshrv8hi3;
18729 case IX86_BUILTIN_PSRLD128:
18730 icode = CODE_FOR_lshrv4si3;
18732 case IX86_BUILTIN_PSRLQ128:
18733 icode = CODE_FOR_lshrv2di3;
18736 arg0 = CALL_EXPR_ARG (exp, 0);
18737 arg1 = CALL_EXPR_ARG (exp, 1);
18738 op0 = expand_normal (arg0);
18739 op1 = expand_normal (arg1);
18741 tmode = insn_data[icode].operand[0].mode;
18742 mode1 = insn_data[icode].operand[1].mode;
18744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18745 op0 = copy_to_reg (op0);
18747 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18748 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18749 op1 = copy_to_reg (op1);
18751 target = gen_reg_rtx (tmode);
18752 pat = GEN_FCN (icode) (target, op0, op1);
18758 case IX86_BUILTIN_PSLLDQI128:
18759 case IX86_BUILTIN_PSRLDQI128:
18760 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18761 : CODE_FOR_sse2_lshrti3);
18762 arg0 = CALL_EXPR_ARG (exp, 0);
18763 arg1 = CALL_EXPR_ARG (exp, 1);
18764 op0 = expand_normal (arg0);
18765 op1 = expand_normal (arg1);
18766 tmode = insn_data[icode].operand[0].mode;
18767 mode1 = insn_data[icode].operand[1].mode;
18768 mode2 = insn_data[icode].operand[2].mode;
18770 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18772 op0 = copy_to_reg (op0);
18773 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18775 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18777 error ("shift must be an immediate");
18780 target = gen_reg_rtx (V2DImode);
18781 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18788 case IX86_BUILTIN_FEMMS:
18789 emit_insn (gen_mmx_femms ());
18792 case IX86_BUILTIN_PAVGUSB:
18793 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18795 case IX86_BUILTIN_PF2ID:
18796 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18798 case IX86_BUILTIN_PFACC:
18799 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18801 case IX86_BUILTIN_PFADD:
18802 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18804 case IX86_BUILTIN_PFCMPEQ:
18805 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18807 case IX86_BUILTIN_PFCMPGE:
18808 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18810 case IX86_BUILTIN_PFCMPGT:
18811 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18813 case IX86_BUILTIN_PFMAX:
18814 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18816 case IX86_BUILTIN_PFMIN:
18817 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18819 case IX86_BUILTIN_PFMUL:
18820 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18822 case IX86_BUILTIN_PFRCP:
18823 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18825 case IX86_BUILTIN_PFRCPIT1:
18826 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18828 case IX86_BUILTIN_PFRCPIT2:
18829 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18831 case IX86_BUILTIN_PFRSQIT1:
18832 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18834 case IX86_BUILTIN_PFRSQRT:
18835 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18837 case IX86_BUILTIN_PFSUB:
18838 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18840 case IX86_BUILTIN_PFSUBR:
18841 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18843 case IX86_BUILTIN_PI2FD:
18844 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18846 case IX86_BUILTIN_PMULHRW:
18847 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18849 case IX86_BUILTIN_PF2IW:
18850 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18852 case IX86_BUILTIN_PFNACC:
18853 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18855 case IX86_BUILTIN_PFPNACC:
18856 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18858 case IX86_BUILTIN_PI2FW:
18859 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18861 case IX86_BUILTIN_PSWAPDSI:
18862 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18864 case IX86_BUILTIN_PSWAPDSF:
18865 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18867 case IX86_BUILTIN_SQRTSD:
18868 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18869 case IX86_BUILTIN_LOADUPD:
18870 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18871 case IX86_BUILTIN_STOREUPD:
18872 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18874 case IX86_BUILTIN_MFENCE:
18875 emit_insn (gen_sse2_mfence ());
18877 case IX86_BUILTIN_LFENCE:
18878 emit_insn (gen_sse2_lfence ());
18881 case IX86_BUILTIN_CLFLUSH:
18882 arg0 = CALL_EXPR_ARG (exp, 0);
18883 op0 = expand_normal (arg0);
18884 icode = CODE_FOR_sse2_clflush;
18885 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18886 op0 = copy_to_mode_reg (Pmode, op0);
18888 emit_insn (gen_sse2_clflush (op0));
18891 case IX86_BUILTIN_MOVNTPD:
18892 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18893 case IX86_BUILTIN_MOVNTDQ:
18894 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18895 case IX86_BUILTIN_MOVNTI:
18896 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18898 case IX86_BUILTIN_LOADDQU:
18899 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18900 case IX86_BUILTIN_STOREDQU:
18901 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18903 case IX86_BUILTIN_MONITOR:
18904 arg0 = CALL_EXPR_ARG (exp, 0);
18905 arg1 = CALL_EXPR_ARG (exp, 1);
18906 arg2 = CALL_EXPR_ARG (exp, 2);
18907 op0 = expand_normal (arg0);
18908 op1 = expand_normal (arg1);
18909 op2 = expand_normal (arg2);
18911 op0 = copy_to_mode_reg (Pmode, op0);
18913 op1 = copy_to_mode_reg (SImode, op1);
18915 op2 = copy_to_mode_reg (SImode, op2);
18917 emit_insn (gen_sse3_monitor (op0, op1, op2));
18919 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18922 case IX86_BUILTIN_MWAIT:
18923 arg0 = CALL_EXPR_ARG (exp, 0);
18924 arg1 = CALL_EXPR_ARG (exp, 1);
18925 op0 = expand_normal (arg0);
18926 op1 = expand_normal (arg1);
18928 op0 = copy_to_mode_reg (SImode, op0);
18930 op1 = copy_to_mode_reg (SImode, op1);
18931 emit_insn (gen_sse3_mwait (op0, op1));
18934 case IX86_BUILTIN_LDDQU:
18935 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18938 case IX86_BUILTIN_PALIGNR:
18939 case IX86_BUILTIN_PALIGNR128:
18940 if (fcode == IX86_BUILTIN_PALIGNR)
18942 icode = CODE_FOR_ssse3_palignrdi;
18947 icode = CODE_FOR_ssse3_palignrti;
18950 arg0 = CALL_EXPR_ARG (exp, 0);
18951 arg1 = CALL_EXPR_ARG (exp, 1);
18952 arg2 = CALL_EXPR_ARG (exp, 2);
18953 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18954 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18955 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18956 tmode = insn_data[icode].operand[0].mode;
18957 mode1 = insn_data[icode].operand[1].mode;
18958 mode2 = insn_data[icode].operand[2].mode;
18959 mode3 = insn_data[icode].operand[3].mode;
18961 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18963 op0 = copy_to_reg (op0);
18964 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18966 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18968 op1 = copy_to_reg (op1);
18969 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18971 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18973 error ("shift must be an immediate");
18976 target = gen_reg_rtx (mode);
18977 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18984 case IX86_BUILTIN_MOVNTDQA:
18985 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
18988 case IX86_BUILTIN_MOVNTSD:
18989 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18991 case IX86_BUILTIN_MOVNTSS:
18992 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18994 case IX86_BUILTIN_INSERTQ:
18995 case IX86_BUILTIN_EXTRQ:
18996 icode = (fcode == IX86_BUILTIN_EXTRQ
18997 ? CODE_FOR_sse4a_extrq
18998 : CODE_FOR_sse4a_insertq);
18999 arg0 = CALL_EXPR_ARG (exp, 0);
19000 arg1 = CALL_EXPR_ARG (exp, 1);
19001 op0 = expand_normal (arg0);
19002 op1 = expand_normal (arg1);
19003 tmode = insn_data[icode].operand[0].mode;
19004 mode1 = insn_data[icode].operand[1].mode;
19005 mode2 = insn_data[icode].operand[2].mode;
19006 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19007 op0 = copy_to_mode_reg (mode1, op0);
19008 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19009 op1 = copy_to_mode_reg (mode2, op1);
19010 if (optimize || target == 0
19011 || GET_MODE (target) != tmode
19012 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19013 target = gen_reg_rtx (tmode);
19014 pat = GEN_FCN (icode) (target, op0, op1);
19020 case IX86_BUILTIN_EXTRQI:
19021 icode = CODE_FOR_sse4a_extrqi;
19022 arg0 = CALL_EXPR_ARG (exp, 0);
19023 arg1 = CALL_EXPR_ARG (exp, 1);
19024 arg2 = CALL_EXPR_ARG (exp, 2);
19025 op0 = expand_normal (arg0);
19026 op1 = expand_normal (arg1);
19027 op2 = expand_normal (arg2);
19028 tmode = insn_data[icode].operand[0].mode;
19029 mode1 = insn_data[icode].operand[1].mode;
19030 mode2 = insn_data[icode].operand[2].mode;
19031 mode3 = insn_data[icode].operand[3].mode;
19032 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19033 op0 = copy_to_mode_reg (mode1, op0);
19034 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19036 error ("index mask must be an immediate");
19037 return gen_reg_rtx (tmode);
19039 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19041 error ("length mask must be an immediate");
19042 return gen_reg_rtx (tmode);
19044 if (optimize || target == 0
19045 || GET_MODE (target) != tmode
19046 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19047 target = gen_reg_rtx (tmode);
19048 pat = GEN_FCN (icode) (target, op0, op1, op2);
19054 case IX86_BUILTIN_INSERTQI:
19055 icode = CODE_FOR_sse4a_insertqi;
19056 arg0 = CALL_EXPR_ARG (exp, 0);
19057 arg1 = CALL_EXPR_ARG (exp, 1);
19058 arg2 = CALL_EXPR_ARG (exp, 2);
19059 arg3 = CALL_EXPR_ARG (exp, 3);
19060 op0 = expand_normal (arg0);
19061 op1 = expand_normal (arg1);
19062 op2 = expand_normal (arg2);
19063 op3 = expand_normal (arg3);
19064 tmode = insn_data[icode].operand[0].mode;
19065 mode1 = insn_data[icode].operand[1].mode;
19066 mode2 = insn_data[icode].operand[2].mode;
19067 mode3 = insn_data[icode].operand[3].mode;
19068 mode4 = insn_data[icode].operand[4].mode;
19070 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19071 op0 = copy_to_mode_reg (mode1, op0);
19073 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19074 op1 = copy_to_mode_reg (mode2, op1);
19076 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19078 error ("index mask must be an immediate");
19079 return gen_reg_rtx (tmode);
19081 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19083 error ("length mask must be an immediate");
19084 return gen_reg_rtx (tmode);
19086 if (optimize || target == 0
19087 || GET_MODE (target) != tmode
19088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19089 target = gen_reg_rtx (tmode);
19090 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19096 case IX86_BUILTIN_VEC_INIT_V2SI:
19097 case IX86_BUILTIN_VEC_INIT_V4HI:
19098 case IX86_BUILTIN_VEC_INIT_V8QI:
19099 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19101 case IX86_BUILTIN_VEC_EXT_V2DF:
19102 case IX86_BUILTIN_VEC_EXT_V2DI:
19103 case IX86_BUILTIN_VEC_EXT_V4SF:
19104 case IX86_BUILTIN_VEC_EXT_V4SI:
19105 case IX86_BUILTIN_VEC_EXT_V8HI:
19106 case IX86_BUILTIN_VEC_EXT_V2SI:
19107 case IX86_BUILTIN_VEC_EXT_V4HI:
19108 case IX86_BUILTIN_VEC_EXT_V16QI:
19109 return ix86_expand_vec_ext_builtin (exp, target);
19111 case IX86_BUILTIN_VEC_SET_V2DI:
19112 case IX86_BUILTIN_VEC_SET_V4SF:
19113 case IX86_BUILTIN_VEC_SET_V4SI:
19114 case IX86_BUILTIN_VEC_SET_V8HI:
19115 case IX86_BUILTIN_VEC_SET_V4HI:
19116 case IX86_BUILTIN_VEC_SET_V16QI:
19117 return ix86_expand_vec_set_builtin (exp);
19123 for (i = 0, d = bdesc_sse_3arg;
19124 i < ARRAY_SIZE (bdesc_sse_3arg);
19126 if (d->code == fcode)
19127 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19130 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19131 if (d->code == fcode)
19133 /* Compares are treated specially. */
19134 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19135 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19136 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19137 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19138 return ix86_expand_sse_compare (d, exp, target);
19140 return ix86_expand_binop_builtin (d->icode, exp, target);
19143 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19144 if (d->code == fcode)
19145 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19147 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19148 if (d->code == fcode)
19149 return ix86_expand_sse_comi (d, exp, target);
19151 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19152 if (d->code == fcode)
19153 return ix86_expand_sse_ptest (d, exp, target);
19155 gcc_unreachable ();
19158 /* Returns a function decl for a vectorized version of the builtin function
19159 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19160 if it is not available. */
19163 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
19166 enum machine_mode in_mode, out_mode;
19169 if (TREE_CODE (type_out) != VECTOR_TYPE
19170 || TREE_CODE (type_in) != VECTOR_TYPE)
19173 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19174 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19175 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19176 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19180 case BUILT_IN_SQRT:
19181 if (out_mode == DFmode && out_n == 2
19182 && in_mode == DFmode && in_n == 2)
19183 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19186 case BUILT_IN_SQRTF:
19187 if (out_mode == SFmode && out_n == 4
19188 && in_mode == SFmode && in_n == 4)
19189 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19192 case BUILT_IN_LRINTF:
19193 if (out_mode == SImode && out_n == 4
19194 && in_mode == SFmode && in_n == 4)
19195 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19205 /* Returns a decl of a function that implements conversion of the
19206 input vector of type TYPE, or NULL_TREE if it is not available. */
19209 ix86_builtin_conversion (enum tree_code code, tree type)
19211 if (TREE_CODE (type) != VECTOR_TYPE)
19217 switch (TYPE_MODE (type))
19220 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19225 case FIX_TRUNC_EXPR:
19226 switch (TYPE_MODE (type))
19229 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19239 /* Store OPERAND to the memory after reload is completed. This means
19240 that we can't easily use assign_stack_local. */
19242 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19246 gcc_assert (reload_completed);
19247 if (TARGET_RED_ZONE)
19249 result = gen_rtx_MEM (mode,
19250 gen_rtx_PLUS (Pmode,
19252 GEN_INT (-RED_ZONE_SIZE)));
19253 emit_move_insn (result, operand);
19255 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19261 operand = gen_lowpart (DImode, operand);
19265 gen_rtx_SET (VOIDmode,
19266 gen_rtx_MEM (DImode,
19267 gen_rtx_PRE_DEC (DImode,
19268 stack_pointer_rtx)),
19272 gcc_unreachable ();
19274 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19283 split_di (&operand, 1, operands, operands + 1);
19285 gen_rtx_SET (VOIDmode,
19286 gen_rtx_MEM (SImode,
19287 gen_rtx_PRE_DEC (Pmode,
19288 stack_pointer_rtx)),
19291 gen_rtx_SET (VOIDmode,
19292 gen_rtx_MEM (SImode,
19293 gen_rtx_PRE_DEC (Pmode,
19294 stack_pointer_rtx)),
19299 /* Store HImodes as SImodes. */
19300 operand = gen_lowpart (SImode, operand);
19304 gen_rtx_SET (VOIDmode,
19305 gen_rtx_MEM (GET_MODE (operand),
19306 gen_rtx_PRE_DEC (SImode,
19307 stack_pointer_rtx)),
19311 gcc_unreachable ();
19313 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19318 /* Free operand from the memory. */
19320 ix86_free_from_memory (enum machine_mode mode)
19322 if (!TARGET_RED_ZONE)
19326 if (mode == DImode || TARGET_64BIT)
19330 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19331 to pop or add instruction if registers are available. */
19332 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19333 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19338 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19339 QImode must go into class Q_REGS.
19340 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19341 movdf to do mem-to-mem moves through integer regs. */
19343 ix86_preferred_reload_class (rtx x, enum reg_class class)
19345 enum machine_mode mode = GET_MODE (x);
19347 /* We're only allowed to return a subclass of CLASS. Many of the
19348 following checks fail for NO_REGS, so eliminate that early. */
19349 if (class == NO_REGS)
19352 /* All classes can load zeros. */
19353 if (x == CONST0_RTX (mode))
19356 /* Force constants into memory if we are loading a (nonzero) constant into
19357 an MMX or SSE register. This is because there are no MMX/SSE instructions
19358 to load from a constant. */
19360 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19363 /* Prefer SSE regs only, if we can use them for math. */
19364 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19365 return SSE_CLASS_P (class) ? class : NO_REGS;
19367 /* Floating-point constants need more complex checks. */
19368 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19370 /* General regs can load everything. */
19371 if (reg_class_subset_p (class, GENERAL_REGS))
19374 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19375 zero above. We only want to wind up preferring 80387 registers if
19376 we plan on doing computation with them. */
19378 && standard_80387_constant_p (x))
19380 /* Limit class to non-sse. */
19381 if (class == FLOAT_SSE_REGS)
19383 if (class == FP_TOP_SSE_REGS)
19385 if (class == FP_SECOND_SSE_REGS)
19386 return FP_SECOND_REG;
19387 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
19394 /* Generally when we see PLUS here, it's the function invariant
19395 (plus soft-fp const_int). Which can only be computed into general
19397 if (GET_CODE (x) == PLUS)
19398 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
19400 /* QImode constants are easy to load, but non-constant QImode data
19401 must go into Q_REGS. */
19402 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19404 if (reg_class_subset_p (class, Q_REGS))
19406 if (reg_class_subset_p (Q_REGS, class))
19414 /* Discourage putting floating-point values in SSE registers unless
19415 SSE math is being used, and likewise for the 387 registers. */
19417 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
19419 enum machine_mode mode = GET_MODE (x);
19421 /* Restrict the output reload class to the register bank that we are doing
19422 math on. If we would like not to return a subset of CLASS, reject this
19423 alternative: if reload cannot do this, it will still use its choice. */
19424 mode = GET_MODE (x);
19425 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19426 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
19428 if (X87_FLOAT_MODE_P (mode))
19430 if (class == FP_TOP_SSE_REGS)
19432 else if (class == FP_SECOND_SSE_REGS)
19433 return FP_SECOND_REG;
19435 return FLOAT_CLASS_P (class) ? class : NO_REGS;
19441 /* If we are copying between general and FP registers, we need a memory
19442 location. The same is true for SSE and MMX registers.
19444 The macro can't work reliably when one of the CLASSES is class containing
19445 registers from multiple units (SSE, MMX, integer). We avoid this by never
19446 combining those units in single alternative in the machine description.
19447 Ensure that this constraint holds to avoid unexpected surprises.
19449 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19450 enforce these sanity checks. */
19453 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
19454 enum machine_mode mode, int strict)
19456 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19457 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19458 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19459 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19460 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19461 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
19463 gcc_assert (!strict);
19467 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19470 /* ??? This is a lie. We do have moves between mmx/general, and for
19471 mmx/sse2. But by saying we need secondary memory we discourage the
19472 register allocator from using the mmx registers unless needed. */
19473 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19476 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19478 /* SSE1 doesn't have any direct moves from other classes. */
19482 /* If the target says that inter-unit moves are more expensive
19483 than moving through memory, then don't generate them. */
19484 if (!TARGET_INTER_UNIT_MOVES)
19487 /* Between SSE and general, we have moves no larger than word size. */
19488 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19495 /* Return true if the registers in CLASS cannot represent the change from
19496 modes FROM to TO. */
19499 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19500 enum reg_class class)
19505 /* x87 registers can't do subreg at all, as all values are reformatted
19506 to extended precision. */
19507 if (MAYBE_FLOAT_CLASS_P (class))
19510 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19512 /* Vector registers do not support QI or HImode loads. If we don't
19513 disallow a change to these modes, reload will assume it's ok to
19514 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19515 the vec_dupv4hi pattern. */
19516 if (GET_MODE_SIZE (from) < 4)
19519 /* Vector registers do not support subreg with nonzero offsets, which
19520 are otherwise valid for integer registers. Since we can't see
19521 whether we have a nonzero offset from here, prohibit all
19522 nonparadoxical subregs changing size. */
19523 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19530 /* Return the cost of moving data from a register in class CLASS1 to
19531 one in class CLASS2.
19533 It is not required that the cost always equal 2 when FROM is the same as TO;
19534 on some machines it is expensive to move between registers if they are not
19535 general registers. */
19538 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19539 enum reg_class class2)
19541 /* In case we require secondary memory, compute cost of the store followed
19542 by load. In order to avoid bad register allocation choices, we need
19543 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19545 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19549 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19550 MEMORY_MOVE_COST (mode, class1, 1));
19551 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19552 MEMORY_MOVE_COST (mode, class2, 1));
19554 /* In case of copying from general_purpose_register we may emit multiple
19555 stores followed by single load causing memory size mismatch stall.
19556 Count this as arbitrarily high cost of 20. */
19557 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19560 /* In the case of FP/MMX moves, the registers actually overlap, and we
19561 have to switch modes in order to treat them differently. */
19562 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19563 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19569 /* Moves between SSE/MMX and integer unit are expensive. */
19570 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19571 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19572 return ix86_cost->mmxsse_to_integer;
19573 if (MAYBE_FLOAT_CLASS_P (class1))
19574 return ix86_cost->fp_move;
19575 if (MAYBE_SSE_CLASS_P (class1))
19576 return ix86_cost->sse_move;
19577 if (MAYBE_MMX_CLASS_P (class1))
19578 return ix86_cost->mmx_move;
19582 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19585 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19587 /* Flags and only flags can only hold CCmode values. */
19588 if (CC_REGNO_P (regno))
19589 return GET_MODE_CLASS (mode) == MODE_CC;
19590 if (GET_MODE_CLASS (mode) == MODE_CC
19591 || GET_MODE_CLASS (mode) == MODE_RANDOM
19592 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19594 if (FP_REGNO_P (regno))
19595 return VALID_FP_MODE_P (mode);
19596 if (SSE_REGNO_P (regno))
19598 /* We implement the move patterns for all vector modes into and
19599 out of SSE registers, even when no operation instructions
19601 return (VALID_SSE_REG_MODE (mode)
19602 || VALID_SSE2_REG_MODE (mode)
19603 || VALID_MMX_REG_MODE (mode)
19604 || VALID_MMX_REG_MODE_3DNOW (mode));
19606 if (MMX_REGNO_P (regno))
19608 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19609 so if the register is available at all, then we can move data of
19610 the given mode into or out of it. */
19611 return (VALID_MMX_REG_MODE (mode)
19612 || VALID_MMX_REG_MODE_3DNOW (mode));
19615 if (mode == QImode)
19617 /* Take care for QImode values - they can be in non-QI regs,
19618 but then they do cause partial register stalls. */
19619 if (regno < 4 || TARGET_64BIT)
19621 if (!TARGET_PARTIAL_REG_STALL)
19623 return reload_in_progress || reload_completed;
19625 /* We handle both integer and floats in the general purpose registers. */
19626 else if (VALID_INT_MODE_P (mode))
19628 else if (VALID_FP_MODE_P (mode))
19630 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19631 on to use that value in smaller contexts, this can easily force a
19632 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19633 supporting DImode, allow it. */
19634 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19640 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19641 tieable integer mode. */
19644 ix86_tieable_integer_mode_p (enum machine_mode mode)
19653 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19656 return TARGET_64BIT;
19663 /* Return true if MODE1 is accessible in a register that can hold MODE2
19664 without copying. That is, all register classes that can hold MODE2
19665 can also hold MODE1. */
19668 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19670 if (mode1 == mode2)
19673 if (ix86_tieable_integer_mode_p (mode1)
19674 && ix86_tieable_integer_mode_p (mode2))
19677 /* MODE2 being XFmode implies fp stack or general regs, which means we
19678 can tie any smaller floating point modes to it. Note that we do not
19679 tie this with TFmode. */
19680 if (mode2 == XFmode)
19681 return mode1 == SFmode || mode1 == DFmode;
19683 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19684 that we can tie it with SFmode. */
19685 if (mode2 == DFmode)
19686 return mode1 == SFmode;
19688 /* If MODE2 is only appropriate for an SSE register, then tie with
19689 any other mode acceptable to SSE registers. */
19690 if (GET_MODE_SIZE (mode2) == 16
19691 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19692 return (GET_MODE_SIZE (mode1) == 16
19693 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19695 /* If MODE2 is appropriate for an MMX register, then tie
19696 with any other mode acceptable to MMX registers. */
19697 if (GET_MODE_SIZE (mode2) == 8
19698 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19699 return (GET_MODE_SIZE (mode1) == 8
19700 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19705 /* Return the cost of moving data of mode M between a
19706 register and memory. A value of 2 is the default; this cost is
19707 relative to those in `REGISTER_MOVE_COST'.
19709 If moving between registers and memory is more expensive than
19710 between two registers, you should define this macro to express the
19713 Model also increased moving costs of QImode registers in non
19717 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19719 if (FLOAT_CLASS_P (class))
19736 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19738 if (SSE_CLASS_P (class))
19741 switch (GET_MODE_SIZE (mode))
19755 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19757 if (MMX_CLASS_P (class))
19760 switch (GET_MODE_SIZE (mode))
19771 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19773 switch (GET_MODE_SIZE (mode))
19777 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19778 : ix86_cost->movzbl_load);
19780 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19781 : ix86_cost->int_store[0] + 4);
19784 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19786 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19787 if (mode == TFmode)
19789 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19790 * (((int) GET_MODE_SIZE (mode)
19791 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19795 /* Compute a (partial) cost for rtx X. Return true if the complete
19796 cost has been computed, and false if subexpressions should be
19797 scanned. In either case, *TOTAL contains the cost result. */
19800 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19802 enum machine_mode mode = GET_MODE (x);
19810 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19812 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19814 else if (flag_pic && SYMBOLIC_CONST (x)
19816 || (!GET_CODE (x) != LABEL_REF
19817 && (GET_CODE (x) != SYMBOL_REF
19818 || !SYMBOL_REF_LOCAL_P (x)))))
19825 if (mode == VOIDmode)
19828 switch (standard_80387_constant_p (x))
19833 default: /* Other constants */
19838 /* Start with (MEM (SYMBOL_REF)), since that's where
19839 it'll probably end up. Add a penalty for size. */
19840 *total = (COSTS_N_INSNS (1)
19841 + (flag_pic != 0 && !TARGET_64BIT)
19842 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19848 /* The zero extensions is often completely free on x86_64, so make
19849 it as cheap as possible. */
19850 if (TARGET_64BIT && mode == DImode
19851 && GET_MODE (XEXP (x, 0)) == SImode)
19853 else if (TARGET_ZERO_EXTEND_WITH_AND)
19854 *total = ix86_cost->add;
19856 *total = ix86_cost->movzx;
19860 *total = ix86_cost->movsx;
19864 if (CONST_INT_P (XEXP (x, 1))
19865 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19867 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19870 *total = ix86_cost->add;
19873 if ((value == 2 || value == 3)
19874 && ix86_cost->lea <= ix86_cost->shift_const)
19876 *total = ix86_cost->lea;
19886 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19888 if (CONST_INT_P (XEXP (x, 1)))
19890 if (INTVAL (XEXP (x, 1)) > 32)
19891 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19893 *total = ix86_cost->shift_const * 2;
19897 if (GET_CODE (XEXP (x, 1)) == AND)
19898 *total = ix86_cost->shift_var * 2;
19900 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19905 if (CONST_INT_P (XEXP (x, 1)))
19906 *total = ix86_cost->shift_const;
19908 *total = ix86_cost->shift_var;
19913 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19915 /* ??? SSE scalar cost should be used here. */
19916 *total = ix86_cost->fmul;
19919 else if (X87_FLOAT_MODE_P (mode))
19921 *total = ix86_cost->fmul;
19924 else if (FLOAT_MODE_P (mode))
19926 /* ??? SSE vector cost should be used here. */
19927 *total = ix86_cost->fmul;
19932 rtx op0 = XEXP (x, 0);
19933 rtx op1 = XEXP (x, 1);
19935 if (CONST_INT_P (XEXP (x, 1)))
19937 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19938 for (nbits = 0; value != 0; value &= value - 1)
19942 /* This is arbitrary. */
19945 /* Compute costs correctly for widening multiplication. */
19946 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19947 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19948 == GET_MODE_SIZE (mode))
19950 int is_mulwiden = 0;
19951 enum machine_mode inner_mode = GET_MODE (op0);
19953 if (GET_CODE (op0) == GET_CODE (op1))
19954 is_mulwiden = 1, op1 = XEXP (op1, 0);
19955 else if (CONST_INT_P (op1))
19957 if (GET_CODE (op0) == SIGN_EXTEND)
19958 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19961 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19965 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19968 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19969 + nbits * ix86_cost->mult_bit
19970 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19979 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19980 /* ??? SSE cost should be used here. */
19981 *total = ix86_cost->fdiv;
19982 else if (X87_FLOAT_MODE_P (mode))
19983 *total = ix86_cost->fdiv;
19984 else if (FLOAT_MODE_P (mode))
19985 /* ??? SSE vector cost should be used here. */
19986 *total = ix86_cost->fdiv;
19988 *total = ix86_cost->divide[MODE_INDEX (mode)];
19992 if (GET_MODE_CLASS (mode) == MODE_INT
19993 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19995 if (GET_CODE (XEXP (x, 0)) == PLUS
19996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19997 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19998 && CONSTANT_P (XEXP (x, 1)))
20000 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20001 if (val == 2 || val == 4 || val == 8)
20003 *total = ix86_cost->lea;
20004 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20005 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20007 *total += rtx_cost (XEXP (x, 1), outer_code);
20011 else if (GET_CODE (XEXP (x, 0)) == MULT
20012 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20014 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20015 if (val == 2 || val == 4 || val == 8)
20017 *total = ix86_cost->lea;
20018 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20019 *total += rtx_cost (XEXP (x, 1), outer_code);
20023 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20025 *total = ix86_cost->lea;
20026 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20027 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20028 *total += rtx_cost (XEXP (x, 1), outer_code);
20035 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20037 /* ??? SSE cost should be used here. */
20038 *total = ix86_cost->fadd;
20041 else if (X87_FLOAT_MODE_P (mode))
20043 *total = ix86_cost->fadd;
20046 else if (FLOAT_MODE_P (mode))
20048 /* ??? SSE vector cost should be used here. */
20049 *total = ix86_cost->fadd;
20057 if (!TARGET_64BIT && mode == DImode)
20059 *total = (ix86_cost->add * 2
20060 + (rtx_cost (XEXP (x, 0), outer_code)
20061 << (GET_MODE (XEXP (x, 0)) != DImode))
20062 + (rtx_cost (XEXP (x, 1), outer_code)
20063 << (GET_MODE (XEXP (x, 1)) != DImode)));
20069 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20071 /* ??? SSE cost should be used here. */
20072 *total = ix86_cost->fchs;
20075 else if (X87_FLOAT_MODE_P (mode))
20077 *total = ix86_cost->fchs;
20080 else if (FLOAT_MODE_P (mode))
20082 /* ??? SSE vector cost should be used here. */
20083 *total = ix86_cost->fchs;
20089 if (!TARGET_64BIT && mode == DImode)
20090 *total = ix86_cost->add * 2;
20092 *total = ix86_cost->add;
20096 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20097 && XEXP (XEXP (x, 0), 1) == const1_rtx
20098 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20099 && XEXP (x, 1) == const0_rtx)
20101 /* This kind of construct is implemented using test[bwl].
20102 Treat it as if we had an AND. */
20103 *total = (ix86_cost->add
20104 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20105 + rtx_cost (const1_rtx, outer_code));
20111 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20116 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20117 /* ??? SSE cost should be used here. */
20118 *total = ix86_cost->fabs;
20119 else if (X87_FLOAT_MODE_P (mode))
20120 *total = ix86_cost->fabs;
20121 else if (FLOAT_MODE_P (mode))
20122 /* ??? SSE vector cost should be used here. */
20123 *total = ix86_cost->fabs;
20127 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20128 /* ??? SSE cost should be used here. */
20129 *total = ix86_cost->fsqrt;
20130 else if (X87_FLOAT_MODE_P (mode))
20131 *total = ix86_cost->fsqrt;
20132 else if (FLOAT_MODE_P (mode))
20133 /* ??? SSE vector cost should be used here. */
20134 *total = ix86_cost->fsqrt;
20138 if (XINT (x, 1) == UNSPEC_TP)
20149 static int current_machopic_label_num;
20151 /* Given a symbol name and its associated stub, write out the
20152 definition of the stub. */
20155 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20157 unsigned int length;
20158 char *binder_name, *symbol_name, lazy_ptr_name[32];
20159 int label = ++current_machopic_label_num;
20161 /* For 64-bit we shouldn't get here. */
20162 gcc_assert (!TARGET_64BIT);
20164 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20165 symb = (*targetm.strip_name_encoding) (symb);
20167 length = strlen (stub);
20168 binder_name = alloca (length + 32);
20169 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20171 length = strlen (symb);
20172 symbol_name = alloca (length + 32);
20173 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20175 sprintf (lazy_ptr_name, "L%d$lz", label);
20178 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20180 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20182 fprintf (file, "%s:\n", stub);
20183 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20187 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20188 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20189 fprintf (file, "\tjmp\t*%%edx\n");
20192 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20194 fprintf (file, "%s:\n", binder_name);
20198 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20199 fprintf (file, "\tpushl\t%%eax\n");
20202 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20204 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20206 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20207 fprintf (file, "%s:\n", lazy_ptr_name);
20208 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20209 fprintf (file, "\t.long %s\n", binder_name);
20213 darwin_x86_file_end (void)
20215 darwin_file_end ();
20218 #endif /* TARGET_MACHO */
20220 /* Order the registers for register allocator. */
20223 x86_order_regs_for_local_alloc (void)
20228 /* First allocate the local general purpose registers. */
20229 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20230 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20231 reg_alloc_order [pos++] = i;
20233 /* Global general purpose registers. */
20234 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20235 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20236 reg_alloc_order [pos++] = i;
20238 /* x87 registers come first in case we are doing FP math
20240 if (!TARGET_SSE_MATH)
20241 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20242 reg_alloc_order [pos++] = i;
20244 /* SSE registers. */
20245 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20246 reg_alloc_order [pos++] = i;
20247 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20248 reg_alloc_order [pos++] = i;
20250 /* x87 registers. */
20251 if (TARGET_SSE_MATH)
20252 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20253 reg_alloc_order [pos++] = i;
20255 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20256 reg_alloc_order [pos++] = i;
20258 /* Initialize the rest of array as we do not allocate some registers
20260 while (pos < FIRST_PSEUDO_REGISTER)
20261 reg_alloc_order [pos++] = 0;
20264 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20265 struct attribute_spec.handler. */
20267 ix86_handle_struct_attribute (tree *node, tree name,
20268 tree args ATTRIBUTE_UNUSED,
20269 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20272 if (DECL_P (*node))
20274 if (TREE_CODE (*node) == TYPE_DECL)
20275 type = &TREE_TYPE (*node);
20280 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20281 || TREE_CODE (*type) == UNION_TYPE)))
20283 warning (OPT_Wattributes, "%qs attribute ignored",
20284 IDENTIFIER_POINTER (name));
20285 *no_add_attrs = true;
20288 else if ((is_attribute_p ("ms_struct", name)
20289 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20290 || ((is_attribute_p ("gcc_struct", name)
20291 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20293 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20294 IDENTIFIER_POINTER (name));
20295 *no_add_attrs = true;
20302 ix86_ms_bitfield_layout_p (tree record_type)
20304 return (TARGET_MS_BITFIELD_LAYOUT &&
20305 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20306 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20309 /* Returns an expression indicating where the this parameter is
20310 located on entry to the FUNCTION. */
20313 x86_this_parameter (tree function)
20315 tree type = TREE_TYPE (function);
20316 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20320 const int *parm_regs;
20322 if (TARGET_64BIT_MS_ABI)
20323 parm_regs = x86_64_ms_abi_int_parameter_registers;
20325 parm_regs = x86_64_int_parameter_registers;
20326 return gen_rtx_REG (DImode, parm_regs[aggr]);
20329 if (ix86_function_regparm (type, function) > 0
20330 && !type_has_variadic_args_p (type))
20333 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20335 return gen_rtx_REG (SImode, regno);
20338 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20341 /* Determine whether x86_output_mi_thunk can succeed. */
20344 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20345 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20346 HOST_WIDE_INT vcall_offset, tree function)
20348 /* 64-bit can handle anything. */
20352 /* For 32-bit, everything's fine if we have one free register. */
20353 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20356 /* Need a free register for vcall_offset. */
20360 /* Need a free register for GOT references. */
20361 if (flag_pic && !(*targetm.binds_local_p) (function))
20364 /* Otherwise ok. */
20368 /* Output the assembler code for a thunk function. THUNK_DECL is the
20369 declaration for the thunk function itself, FUNCTION is the decl for
20370 the target function. DELTA is an immediate constant offset to be
20371 added to THIS. If VCALL_OFFSET is nonzero, the word at
20372 *(*this + vcall_offset) should be added to THIS. */
20375 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20376 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20377 HOST_WIDE_INT vcall_offset, tree function)
20380 rtx this = x86_this_parameter (function);
20383 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20384 pull it in now and let DELTA benefit. */
20387 else if (vcall_offset)
20389 /* Put the this parameter into %eax. */
20391 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20392 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20395 this_reg = NULL_RTX;
20397 /* Adjust the this parameter by a fixed constant. */
20400 xops[0] = GEN_INT (delta);
20401 xops[1] = this_reg ? this_reg : this;
20404 if (!x86_64_general_operand (xops[0], DImode))
20406 tmp = gen_rtx_REG (DImode, R10_REG);
20408 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20412 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20415 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20418 /* Adjust the this parameter by a value stored in the vtable. */
20422 tmp = gen_rtx_REG (DImode, R10_REG);
20425 int tmp_regno = 2 /* ECX */;
20426 if (lookup_attribute ("fastcall",
20427 TYPE_ATTRIBUTES (TREE_TYPE (function))))
20428 tmp_regno = 0 /* EAX */;
20429 tmp = gen_rtx_REG (SImode, tmp_regno);
20432 xops[0] = gen_rtx_MEM (Pmode, this_reg);
20435 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20437 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20439 /* Adjust the this parameter. */
20440 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
20441 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
20443 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
20444 xops[0] = GEN_INT (vcall_offset);
20446 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20447 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
20449 xops[1] = this_reg;
20451 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20453 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20456 /* If necessary, drop THIS back to its stack slot. */
20457 if (this_reg && this_reg != this)
20459 xops[0] = this_reg;
20461 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20464 xops[0] = XEXP (DECL_RTL (function), 0);
20467 if (!flag_pic || (*targetm.binds_local_p) (function))
20468 output_asm_insn ("jmp\t%P0", xops);
20469 /* All thunks should be in the same object as their target,
20470 and thus binds_local_p should be true. */
20471 else if (TARGET_64BIT_MS_ABI)
20472 gcc_unreachable ();
20475 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
20476 tmp = gen_rtx_CONST (Pmode, tmp);
20477 tmp = gen_rtx_MEM (QImode, tmp);
20479 output_asm_insn ("jmp\t%A0", xops);
20484 if (!flag_pic || (*targetm.binds_local_p) (function))
20485 output_asm_insn ("jmp\t%P0", xops);
20490 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20491 tmp = (gen_rtx_SYMBOL_REF
20493 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20494 tmp = gen_rtx_MEM (QImode, tmp);
20496 output_asm_insn ("jmp\t%0", xops);
20499 #endif /* TARGET_MACHO */
20501 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20502 output_set_got (tmp, NULL_RTX);
20505 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20506 output_asm_insn ("jmp\t{*}%1", xops);
20512 x86_file_start (void)
20514 default_file_start ();
20516 darwin_file_start ();
20518 if (X86_FILE_START_VERSION_DIRECTIVE)
20519 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20520 if (X86_FILE_START_FLTUSED)
20521 fputs ("\t.global\t__fltused\n", asm_out_file);
20522 if (ix86_asm_dialect == ASM_INTEL)
20523 fputs ("\t.intel_syntax\n", asm_out_file);
20527 x86_field_alignment (tree field, int computed)
20529 enum machine_mode mode;
20530 tree type = TREE_TYPE (field);
20532 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20534 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20535 ? get_inner_array_type (type) : type);
20536 if (mode == DFmode || mode == DCmode
20537 || GET_MODE_CLASS (mode) == MODE_INT
20538 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20539 return MIN (32, computed);
20543 /* Output assembler code to FILE to increment profiler label # LABELNO
20544 for profiling a function entry. */
20546 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20550 #ifndef NO_PROFILE_COUNTERS
20551 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20554 if (!TARGET_64BIT_MS_ABI && flag_pic)
20555 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20557 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20561 #ifndef NO_PROFILE_COUNTERS
20562 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20563 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20565 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20569 #ifndef NO_PROFILE_COUNTERS
20570 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20571 PROFILE_COUNT_REGISTER);
20573 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20577 /* We don't have exact information about the insn sizes, but we may assume
20578 quite safely that we are informed about all 1 byte insns and memory
20579 address sizes. This is enough to eliminate unnecessary padding in
20583 min_insn_size (rtx insn)
20587 if (!INSN_P (insn) || !active_insn_p (insn))
20590 /* Discard alignments we've emit and jump instructions. */
20591 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20592 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20595 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20596 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20599 /* Important case - calls are always 5 bytes.
20600 It is common to have many calls in the row. */
20602 && symbolic_reference_mentioned_p (PATTERN (insn))
20603 && !SIBLING_CALL_P (insn))
20605 if (get_attr_length (insn) <= 1)
20608 /* For normal instructions we may rely on the sizes of addresses
20609 and the presence of symbol to require 4 bytes of encoding.
20610 This is not the case for jumps where references are PC relative. */
20611 if (!JUMP_P (insn))
20613 l = get_attr_length_address (insn);
20614 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20623 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20627 ix86_avoid_jump_misspredicts (void)
20629 rtx insn, start = get_insns ();
20630 int nbytes = 0, njumps = 0;
20633 /* Look for all minimal intervals of instructions containing 4 jumps.
20634 The intervals are bounded by START and INSN. NBYTES is the total
20635 size of instructions in the interval including INSN and not including
20636 START. When the NBYTES is smaller than 16 bytes, it is possible
20637 that the end of START and INSN ends up in the same 16byte page.
20639 The smallest offset in the page INSN can start is the case where START
20640 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20641 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20643 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20646 nbytes += min_insn_size (insn);
20648 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20649 INSN_UID (insn), min_insn_size (insn));
20651 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20652 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20660 start = NEXT_INSN (start);
20661 if ((JUMP_P (start)
20662 && GET_CODE (PATTERN (start)) != ADDR_VEC
20663 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20665 njumps--, isjump = 1;
20668 nbytes -= min_insn_size (start);
20670 gcc_assert (njumps >= 0);
20672 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20673 INSN_UID (start), INSN_UID (insn), nbytes);
20675 if (njumps == 3 && isjump && nbytes < 16)
20677 int padsize = 15 - nbytes + min_insn_size (insn);
20680 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20681 INSN_UID (insn), padsize);
20682 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20687 /* AMD Athlon works faster
20688 when RET is not destination of conditional jump or directly preceded
20689 by other jump instruction. We avoid the penalty by inserting NOP just
20690 before the RET instructions in such cases. */
20692 ix86_pad_returns (void)
20697 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20699 basic_block bb = e->src;
20700 rtx ret = BB_END (bb);
20702 bool replace = false;
20704 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20705 || !maybe_hot_bb_p (bb))
20707 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20708 if (active_insn_p (prev) || LABEL_P (prev))
20710 if (prev && LABEL_P (prev))
20715 FOR_EACH_EDGE (e, ei, bb->preds)
20716 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20717 && !(e->flags & EDGE_FALLTHRU))
20722 prev = prev_active_insn (ret);
20724 && ((JUMP_P (prev) && any_condjump_p (prev))
20727 /* Empty functions get branch mispredict even when the jump destination
20728 is not visible to us. */
20729 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20734 emit_insn_before (gen_return_internal_long (), ret);
20740 /* Implement machine specific optimizations. We implement padding of returns
20741 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20745 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20746 ix86_pad_returns ();
20747 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20748 ix86_avoid_jump_misspredicts ();
20751 /* Return nonzero when QImode register that must be represented via REX prefix
20754 x86_extended_QIreg_mentioned_p (rtx insn)
20757 extract_insn_cached (insn);
20758 for (i = 0; i < recog_data.n_operands; i++)
20759 if (REG_P (recog_data.operand[i])
20760 && REGNO (recog_data.operand[i]) >= 4)
20765 /* Return nonzero when P points to register encoded via REX prefix.
20766 Called via for_each_rtx. */
20768 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20770 unsigned int regno;
20773 regno = REGNO (*p);
20774 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20777 /* Return true when INSN mentions register that must be encoded using REX
20780 x86_extended_reg_mentioned_p (rtx insn)
20782 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20785 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20786 optabs would emit if we didn't have TFmode patterns. */
20789 x86_emit_floatuns (rtx operands[2])
20791 rtx neglab, donelab, i0, i1, f0, in, out;
20792 enum machine_mode mode, inmode;
20794 inmode = GET_MODE (operands[1]);
20795 gcc_assert (inmode == SImode || inmode == DImode);
20798 in = force_reg (inmode, operands[1]);
20799 mode = GET_MODE (out);
20800 neglab = gen_label_rtx ();
20801 donelab = gen_label_rtx ();
20802 f0 = gen_reg_rtx (mode);
20804 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20806 expand_float (out, in, 0);
20808 emit_jump_insn (gen_jump (donelab));
20811 emit_label (neglab);
20813 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20815 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20817 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20819 expand_float (f0, i0, 0);
20821 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20823 emit_label (donelab);
20826 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20827 with all elements equal to VAR. Return true if successful. */
20830 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20831 rtx target, rtx val)
20833 enum machine_mode smode, wsmode, wvmode;
20848 val = force_reg (GET_MODE_INNER (mode), val);
20849 x = gen_rtx_VEC_DUPLICATE (mode, val);
20850 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20856 if (TARGET_SSE || TARGET_3DNOW_A)
20858 val = gen_lowpart (SImode, val);
20859 x = gen_rtx_TRUNCATE (HImode, val);
20860 x = gen_rtx_VEC_DUPLICATE (mode, x);
20861 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20883 /* Extend HImode to SImode using a paradoxical SUBREG. */
20884 tmp1 = gen_reg_rtx (SImode);
20885 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20886 /* Insert the SImode value as low element of V4SImode vector. */
20887 tmp2 = gen_reg_rtx (V4SImode);
20888 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20889 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20890 CONST0_RTX (V4SImode),
20892 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20893 /* Cast the V4SImode vector back to a V8HImode vector. */
20894 tmp1 = gen_reg_rtx (V8HImode);
20895 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20896 /* Duplicate the low short through the whole low SImode word. */
20897 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20898 /* Cast the V8HImode vector back to a V4SImode vector. */
20899 tmp2 = gen_reg_rtx (V4SImode);
20900 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20901 /* Replicate the low element of the V4SImode vector. */
20902 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20903 /* Cast the V2SImode back to V8HImode, and store in target. */
20904 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20915 /* Extend QImode to SImode using a paradoxical SUBREG. */
20916 tmp1 = gen_reg_rtx (SImode);
20917 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20918 /* Insert the SImode value as low element of V4SImode vector. */
20919 tmp2 = gen_reg_rtx (V4SImode);
20920 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20921 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20922 CONST0_RTX (V4SImode),
20924 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20925 /* Cast the V4SImode vector back to a V16QImode vector. */
20926 tmp1 = gen_reg_rtx (V16QImode);
20927 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20928 /* Duplicate the low byte through the whole low SImode word. */
20929 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20930 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20931 /* Cast the V16QImode vector back to a V4SImode vector. */
20932 tmp2 = gen_reg_rtx (V4SImode);
20933 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20934 /* Replicate the low element of the V4SImode vector. */
20935 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20936 /* Cast the V2SImode back to V16QImode, and store in target. */
20937 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20945 /* Replicate the value once into the next wider mode and recurse. */
20946 val = convert_modes (wsmode, smode, val, true);
20947 x = expand_simple_binop (wsmode, ASHIFT, val,
20948 GEN_INT (GET_MODE_BITSIZE (smode)),
20949 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20950 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20952 x = gen_reg_rtx (wvmode);
20953 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20954 gcc_unreachable ();
20955 emit_move_insn (target, gen_lowpart (mode, x));
20963 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20964 whose ONE_VAR element is VAR, and other elements are zero. Return true
20968 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20969 rtx target, rtx var, int one_var)
20971 enum machine_mode vsimode;
20987 var = force_reg (GET_MODE_INNER (mode), var);
20988 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20989 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20994 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20995 new_target = gen_reg_rtx (mode);
20997 new_target = target;
20998 var = force_reg (GET_MODE_INNER (mode), var);
20999 x = gen_rtx_VEC_DUPLICATE (mode, var);
21000 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21001 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21004 /* We need to shuffle the value to the correct position, so
21005 create a new pseudo to store the intermediate result. */
21007 /* With SSE2, we can use the integer shuffle insns. */
21008 if (mode != V4SFmode && TARGET_SSE2)
21010 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21012 GEN_INT (one_var == 1 ? 0 : 1),
21013 GEN_INT (one_var == 2 ? 0 : 1),
21014 GEN_INT (one_var == 3 ? 0 : 1)));
21015 if (target != new_target)
21016 emit_move_insn (target, new_target);
21020 /* Otherwise convert the intermediate result to V4SFmode and
21021 use the SSE1 shuffle instructions. */
21022 if (mode != V4SFmode)
21024 tmp = gen_reg_rtx (V4SFmode);
21025 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21030 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21032 GEN_INT (one_var == 1 ? 0 : 1),
21033 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21034 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21036 if (mode != V4SFmode)
21037 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21038 else if (tmp != target)
21039 emit_move_insn (target, tmp);
21041 else if (target != new_target)
21042 emit_move_insn (target, new_target);
21047 vsimode = V4SImode;
21053 vsimode = V2SImode;
21059 /* Zero extend the variable element to SImode and recurse. */
21060 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21062 x = gen_reg_rtx (vsimode);
21063 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21065 gcc_unreachable ();
21067 emit_move_insn (target, gen_lowpart (mode, x));
21075 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21076 consisting of the values in VALS. It is known that all elements
21077 except ONE_VAR are constants. Return true if successful. */
21080 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21081 rtx target, rtx vals, int one_var)
21083 rtx var = XVECEXP (vals, 0, one_var);
21084 enum machine_mode wmode;
21087 const_vec = copy_rtx (vals);
21088 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21089 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21097 /* For the two element vectors, it's just as easy to use
21098 the general case. */
21114 /* There's no way to set one QImode entry easily. Combine
21115 the variable value with its adjacent constant value, and
21116 promote to an HImode set. */
21117 x = XVECEXP (vals, 0, one_var ^ 1);
21120 var = convert_modes (HImode, QImode, var, true);
21121 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21123 x = GEN_INT (INTVAL (x) & 0xff);
21127 var = convert_modes (HImode, QImode, var, true);
21128 x = gen_int_mode (INTVAL (x) << 8, HImode);
21130 if (x != const0_rtx)
21131 var = expand_simple_binop (HImode, IOR, var, x, var,
21132 1, OPTAB_LIB_WIDEN);
21134 x = gen_reg_rtx (wmode);
21135 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21136 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21138 emit_move_insn (target, gen_lowpart (mode, x));
21145 emit_move_insn (target, const_vec);
21146 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21150 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21151 all values variable, and none identical. */
21154 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21155 rtx target, rtx vals)
21157 enum machine_mode half_mode = GET_MODE_INNER (mode);
21158 rtx op0 = NULL, op1 = NULL;
21159 bool use_vec_concat = false;
21165 if (!mmx_ok && !TARGET_SSE)
21171 /* For the two element vectors, we always implement VEC_CONCAT. */
21172 op0 = XVECEXP (vals, 0, 0);
21173 op1 = XVECEXP (vals, 0, 1);
21174 use_vec_concat = true;
21178 half_mode = V2SFmode;
21181 half_mode = V2SImode;
21187 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21188 Recurse to load the two halves. */
21190 op0 = gen_reg_rtx (half_mode);
21191 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21192 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21194 op1 = gen_reg_rtx (half_mode);
21195 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21196 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21198 use_vec_concat = true;
21209 gcc_unreachable ();
21212 if (use_vec_concat)
21214 if (!register_operand (op0, half_mode))
21215 op0 = force_reg (half_mode, op0);
21216 if (!register_operand (op1, half_mode))
21217 op1 = force_reg (half_mode, op1);
21219 emit_insn (gen_rtx_SET (VOIDmode, target,
21220 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21224 int i, j, n_elts, n_words, n_elt_per_word;
21225 enum machine_mode inner_mode;
21226 rtx words[4], shift;
21228 inner_mode = GET_MODE_INNER (mode);
21229 n_elts = GET_MODE_NUNITS (mode);
21230 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21231 n_elt_per_word = n_elts / n_words;
21232 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21234 for (i = 0; i < n_words; ++i)
21236 rtx word = NULL_RTX;
21238 for (j = 0; j < n_elt_per_word; ++j)
21240 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21241 elt = convert_modes (word_mode, inner_mode, elt, true);
21247 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21248 word, 1, OPTAB_LIB_WIDEN);
21249 word = expand_simple_binop (word_mode, IOR, word, elt,
21250 word, 1, OPTAB_LIB_WIDEN);
21258 emit_move_insn (target, gen_lowpart (mode, words[0]));
21259 else if (n_words == 2)
21261 rtx tmp = gen_reg_rtx (mode);
21262 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21263 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21264 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21265 emit_move_insn (target, tmp);
21267 else if (n_words == 4)
21269 rtx tmp = gen_reg_rtx (V4SImode);
21270 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21271 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21272 emit_move_insn (target, gen_lowpart (mode, tmp));
21275 gcc_unreachable ();
21279 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21280 instructions unless MMX_OK is true. */
21283 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21285 enum machine_mode mode = GET_MODE (target);
21286 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21287 int n_elts = GET_MODE_NUNITS (mode);
21288 int n_var = 0, one_var = -1;
21289 bool all_same = true, all_const_zero = true;
21293 for (i = 0; i < n_elts; ++i)
21295 x = XVECEXP (vals, 0, i);
21296 if (!CONSTANT_P (x))
21297 n_var++, one_var = i;
21298 else if (x != CONST0_RTX (inner_mode))
21299 all_const_zero = false;
21300 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21304 /* Constants are best loaded from the constant pool. */
21307 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21311 /* If all values are identical, broadcast the value. */
21313 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21314 XVECEXP (vals, 0, 0)))
21317 /* Values where only one field is non-constant are best loaded from
21318 the pool and overwritten via move later. */
21322 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21323 XVECEXP (vals, 0, one_var),
21327 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21331 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21335 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21337 enum machine_mode mode = GET_MODE (target);
21338 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21339 bool use_vec_merge = false;
21348 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21349 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21351 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21353 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21354 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21360 use_vec_merge = TARGET_SSE4_1;
21368 /* For the two element vectors, we implement a VEC_CONCAT with
21369 the extraction of the other element. */
21371 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21372 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21375 op0 = val, op1 = tmp;
21377 op0 = tmp, op1 = val;
21379 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21380 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21385 use_vec_merge = TARGET_SSE4_1;
21392 use_vec_merge = true;
21396 /* tmp = target = A B C D */
21397 tmp = copy_to_reg (target);
21398 /* target = A A B B */
21399 emit_insn (gen_sse_unpcklps (target, target, target));
21400 /* target = X A B B */
21401 ix86_expand_vector_set (false, target, val, 0);
21402 /* target = A X C D */
21403 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21404 GEN_INT (1), GEN_INT (0),
21405 GEN_INT (2+4), GEN_INT (3+4)));
21409 /* tmp = target = A B C D */
21410 tmp = copy_to_reg (target);
21411 /* tmp = X B C D */
21412 ix86_expand_vector_set (false, tmp, val, 0);
21413 /* target = A B X D */
21414 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21415 GEN_INT (0), GEN_INT (1),
21416 GEN_INT (0+4), GEN_INT (3+4)));
21420 /* tmp = target = A B C D */
21421 tmp = copy_to_reg (target);
21422 /* tmp = X B C D */
21423 ix86_expand_vector_set (false, tmp, val, 0);
21424 /* target = A B X D */
21425 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21426 GEN_INT (0), GEN_INT (1),
21427 GEN_INT (2+4), GEN_INT (0+4)));
21431 gcc_unreachable ();
21436 use_vec_merge = TARGET_SSE4_1;
21440 /* Element 0 handled by vec_merge below. */
21443 use_vec_merge = true;
21449 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21450 store into element 0, then shuffle them back. */
21454 order[0] = GEN_INT (elt);
21455 order[1] = const1_rtx;
21456 order[2] = const2_rtx;
21457 order[3] = GEN_INT (3);
21458 order[elt] = const0_rtx;
21460 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21461 order[1], order[2], order[3]));
21463 ix86_expand_vector_set (false, target, val, 0);
21465 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21466 order[1], order[2], order[3]));
21470 /* For SSE1, we have to reuse the V4SF code. */
21471 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
21472 gen_lowpart (SFmode, val), elt);
21477 use_vec_merge = TARGET_SSE2;
21480 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21484 use_vec_merge = TARGET_SSE4_1;
21494 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21495 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21496 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21500 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21502 emit_move_insn (mem, target);
21504 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21505 emit_move_insn (tmp, val);
21507 emit_move_insn (target, mem);
21512 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21514 enum machine_mode mode = GET_MODE (vec);
21515 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21516 bool use_vec_extr = false;
21529 use_vec_extr = true;
21533 use_vec_extr = TARGET_SSE4_1;
21545 tmp = gen_reg_rtx (mode);
21546 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21547 GEN_INT (elt), GEN_INT (elt),
21548 GEN_INT (elt+4), GEN_INT (elt+4)));
21552 tmp = gen_reg_rtx (mode);
21553 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21557 gcc_unreachable ();
21560 use_vec_extr = true;
21565 use_vec_extr = TARGET_SSE4_1;
21579 tmp = gen_reg_rtx (mode);
21580 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21581 GEN_INT (elt), GEN_INT (elt),
21582 GEN_INT (elt), GEN_INT (elt)));
21586 tmp = gen_reg_rtx (mode);
21587 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21591 gcc_unreachable ();
21594 use_vec_extr = true;
21599 /* For SSE1, we have to reuse the V4SF code. */
21600 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21601 gen_lowpart (V4SFmode, vec), elt);
21607 use_vec_extr = TARGET_SSE2;
21610 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21614 use_vec_extr = TARGET_SSE4_1;
21618 /* ??? Could extract the appropriate HImode element and shift. */
21625 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21626 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21628 /* Let the rtl optimizers know about the zero extension performed. */
21629 if (inner_mode == QImode || inner_mode == HImode)
21631 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21632 target = gen_lowpart (SImode, target);
21635 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21639 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21641 emit_move_insn (mem, vec);
21643 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21644 emit_move_insn (target, tmp);
21648 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21649 pattern to reduce; DEST is the destination; IN is the input vector. */
21652 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21654 rtx tmp1, tmp2, tmp3;
21656 tmp1 = gen_reg_rtx (V4SFmode);
21657 tmp2 = gen_reg_rtx (V4SFmode);
21658 tmp3 = gen_reg_rtx (V4SFmode);
21660 emit_insn (gen_sse_movhlps (tmp1, in, in));
21661 emit_insn (fn (tmp2, tmp1, in));
21663 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21664 GEN_INT (1), GEN_INT (1),
21665 GEN_INT (1+4), GEN_INT (1+4)));
21666 emit_insn (fn (dest, tmp2, tmp3));
21669 /* Target hook for scalar_mode_supported_p. */
21671 ix86_scalar_mode_supported_p (enum machine_mode mode)
21673 if (DECIMAL_FLOAT_MODE_P (mode))
21675 else if (mode == TFmode)
21676 return TARGET_64BIT;
21678 return default_scalar_mode_supported_p (mode);
21681 /* Implements target hook vector_mode_supported_p. */
21683 ix86_vector_mode_supported_p (enum machine_mode mode)
21685 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21687 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21689 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21691 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21696 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21698 We do this in the new i386 backend to maintain source compatibility
21699 with the old cc0-based compiler. */
21702 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21703 tree inputs ATTRIBUTE_UNUSED,
21706 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21708 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21713 /* Implements target vector targetm.asm.encode_section_info. This
21714 is not used by netware. */
21716 static void ATTRIBUTE_UNUSED
21717 ix86_encode_section_info (tree decl, rtx rtl, int first)
21719 default_encode_section_info (decl, rtl, first);
21721 if (TREE_CODE (decl) == VAR_DECL
21722 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21723 && ix86_in_large_data_p (decl))
21724 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21727 /* Worker function for REVERSE_CONDITION. */
21730 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21732 return (mode != CCFPmode && mode != CCFPUmode
21733 ? reverse_condition (code)
21734 : reverse_condition_maybe_unordered (code));
21737 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21741 output_387_reg_move (rtx insn, rtx *operands)
21743 if (REG_P (operands[0]))
21745 if (REG_P (operands[1])
21746 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21748 if (REGNO (operands[0]) == FIRST_STACK_REG)
21749 return output_387_ffreep (operands, 0);
21750 return "fstp\t%y0";
21752 if (STACK_TOP_P (operands[0]))
21753 return "fld%z1\t%y1";
21756 else if (MEM_P (operands[0]))
21758 gcc_assert (REG_P (operands[1]));
21759 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21760 return "fstp%z0\t%y0";
21763 /* There is no non-popping store to memory for XFmode.
21764 So if we need one, follow the store with a load. */
21765 if (GET_MODE (operands[0]) == XFmode)
21766 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21768 return "fst%z0\t%y0";
21775 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21776 FP status register is set. */
21779 ix86_emit_fp_unordered_jump (rtx label)
21781 rtx reg = gen_reg_rtx (HImode);
21784 emit_insn (gen_x86_fnstsw_1 (reg));
21786 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21788 emit_insn (gen_x86_sahf_1 (reg));
21790 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21791 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21795 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21797 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21798 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21801 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21802 gen_rtx_LABEL_REF (VOIDmode, label),
21804 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21806 emit_jump_insn (temp);
21807 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21810 /* Output code to perform a log1p XFmode calculation. */
21812 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21814 rtx label1 = gen_label_rtx ();
21815 rtx label2 = gen_label_rtx ();
21817 rtx tmp = gen_reg_rtx (XFmode);
21818 rtx tmp2 = gen_reg_rtx (XFmode);
21820 emit_insn (gen_absxf2 (tmp, op1));
21821 emit_insn (gen_cmpxf (tmp,
21822 CONST_DOUBLE_FROM_REAL_VALUE (
21823 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21825 emit_jump_insn (gen_bge (label1));
21827 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21828 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21829 emit_jump (label2);
21831 emit_label (label1);
21832 emit_move_insn (tmp, CONST1_RTX (XFmode));
21833 emit_insn (gen_addxf3 (tmp, op1, tmp));
21834 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21835 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21837 emit_label (label2);
21840 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21842 static void ATTRIBUTE_UNUSED
21843 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21846 /* With Binutils 2.15, the "@unwind" marker must be specified on
21847 every occurrence of the ".eh_frame" section, not just the first
21850 && strcmp (name, ".eh_frame") == 0)
21852 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21853 flags & SECTION_WRITE ? "aw" : "a");
21856 default_elf_asm_named_section (name, flags, decl);
21859 /* Return the mangling of TYPE if it is an extended fundamental type. */
21861 static const char *
21862 ix86_mangle_fundamental_type (tree type)
21864 switch (TYPE_MODE (type))
21867 /* __float128 is "g". */
21870 /* "long double" or __float80 is "e". */
21877 /* For 32-bit code we can save PIC register setup by using
21878 __stack_chk_fail_local hidden function instead of calling
21879 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21880 register, so it is better to call __stack_chk_fail directly. */
21883 ix86_stack_protect_fail (void)
21885 return TARGET_64BIT
21886 ? default_external_stack_protect_fail ()
21887 : default_hidden_stack_protect_fail ();
21890 /* Select a format to encode pointers in exception handling data. CODE
21891 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21892 true if the symbol may be affected by dynamic relocations.
21894 ??? All x86 object file formats are capable of representing this.
21895 After all, the relocation needed is the same as for the call insn.
21896 Whether or not a particular assembler allows us to enter such, I
21897 guess we'll have to see. */
21899 asm_preferred_eh_data_format (int code, int global)
21903 int type = DW_EH_PE_sdata8;
21905 || ix86_cmodel == CM_SMALL_PIC
21906 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21907 type = DW_EH_PE_sdata4;
21908 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21910 if (ix86_cmodel == CM_SMALL
21911 || (ix86_cmodel == CM_MEDIUM && code))
21912 return DW_EH_PE_udata4;
21913 return DW_EH_PE_absptr;
21916 /* Expand copysign from SIGN to the positive value ABS_VALUE
21917 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21920 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21922 enum machine_mode mode = GET_MODE (sign);
21923 rtx sgn = gen_reg_rtx (mode);
21924 if (mask == NULL_RTX)
21926 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21927 if (!VECTOR_MODE_P (mode))
21929 /* We need to generate a scalar mode mask in this case. */
21930 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21931 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21932 mask = gen_reg_rtx (mode);
21933 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21937 mask = gen_rtx_NOT (mode, mask);
21938 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21939 gen_rtx_AND (mode, mask, sign)));
21940 emit_insn (gen_rtx_SET (VOIDmode, result,
21941 gen_rtx_IOR (mode, abs_value, sgn)));
21944 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21945 mask for masking out the sign-bit is stored in *SMASK, if that is
21948 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21950 enum machine_mode mode = GET_MODE (op0);
21953 xa = gen_reg_rtx (mode);
21954 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21955 if (!VECTOR_MODE_P (mode))
21957 /* We need to generate a scalar mode mask in this case. */
21958 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21959 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21960 mask = gen_reg_rtx (mode);
21961 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21963 emit_insn (gen_rtx_SET (VOIDmode, xa,
21964 gen_rtx_AND (mode, op0, mask)));
21972 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21973 swapping the operands if SWAP_OPERANDS is true. The expanded
21974 code is a forward jump to a newly created label in case the
21975 comparison is true. The generated label rtx is returned. */
21977 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21978 bool swap_operands)
21989 label = gen_label_rtx ();
21990 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21991 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21992 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21993 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21994 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21995 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21996 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21997 JUMP_LABEL (tmp) = label;
22002 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22003 using comparison code CODE. Operands are swapped for the comparison if
22004 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22006 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22007 bool swap_operands)
22009 enum machine_mode mode = GET_MODE (op0);
22010 rtx mask = gen_reg_rtx (mode);
22019 if (mode == DFmode)
22020 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22021 gen_rtx_fmt_ee (code, mode, op0, op1)));
22023 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22024 gen_rtx_fmt_ee (code, mode, op0, op1)));
22029 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22030 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22032 ix86_gen_TWO52 (enum machine_mode mode)
22034 REAL_VALUE_TYPE TWO52r;
22037 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22038 TWO52 = const_double_from_real_value (TWO52r, mode);
22039 TWO52 = force_reg (mode, TWO52);
22044 /* Expand SSE sequence for computing lround from OP1 storing
22047 ix86_expand_lround (rtx op0, rtx op1)
22049 /* C code for the stuff we're doing below:
22050 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22053 enum machine_mode mode = GET_MODE (op1);
22054 const struct real_format *fmt;
22055 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22058 /* load nextafter (0.5, 0.0) */
22059 fmt = REAL_MODE_FORMAT (mode);
22060 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22061 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22063 /* adj = copysign (0.5, op1) */
22064 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22065 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22067 /* adj = op1 + adj */
22068 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22070 /* op0 = (imode)adj */
22071 expand_fix (op0, adj, 0);
22074 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22077 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22079 /* C code for the stuff we're doing below (for do_floor):
22081 xi -= (double)xi > op1 ? 1 : 0;
22084 enum machine_mode fmode = GET_MODE (op1);
22085 enum machine_mode imode = GET_MODE (op0);
22086 rtx ireg, freg, label, tmp;
22088 /* reg = (long)op1 */
22089 ireg = gen_reg_rtx (imode);
22090 expand_fix (ireg, op1, 0);
22092 /* freg = (double)reg */
22093 freg = gen_reg_rtx (fmode);
22094 expand_float (freg, ireg, 0);
22096 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22097 label = ix86_expand_sse_compare_and_jump (UNLE,
22098 freg, op1, !do_floor);
22099 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22100 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22101 emit_move_insn (ireg, tmp);
22103 emit_label (label);
22104 LABEL_NUSES (label) = 1;
22106 emit_move_insn (op0, ireg);
22109 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22110 result in OPERAND0. */
22112 ix86_expand_rint (rtx operand0, rtx operand1)
22114 /* C code for the stuff we're doing below:
22115 xa = fabs (operand1);
22116 if (!isless (xa, 2**52))
22118 xa = xa + 2**52 - 2**52;
22119 return copysign (xa, operand1);
22121 enum machine_mode mode = GET_MODE (operand0);
22122 rtx res, xa, label, TWO52, mask;
22124 res = gen_reg_rtx (mode);
22125 emit_move_insn (res, operand1);
22127 /* xa = abs (operand1) */
22128 xa = ix86_expand_sse_fabs (res, &mask);
22130 /* if (!isless (xa, TWO52)) goto label; */
22131 TWO52 = ix86_gen_TWO52 (mode);
22132 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22134 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22135 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22137 ix86_sse_copysign_to_positive (res, xa, res, mask);
22139 emit_label (label);
22140 LABEL_NUSES (label) = 1;
22142 emit_move_insn (operand0, res);
22145 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22148 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22150 /* C code for the stuff we expand below.
22151 double xa = fabs (x), x2;
22152 if (!isless (xa, TWO52))
22154 xa = xa + TWO52 - TWO52;
22155 x2 = copysign (xa, x);
22164 enum machine_mode mode = GET_MODE (operand0);
22165 rtx xa, TWO52, tmp, label, one, res, mask;
22167 TWO52 = ix86_gen_TWO52 (mode);
22169 /* Temporary for holding the result, initialized to the input
22170 operand to ease control flow. */
22171 res = gen_reg_rtx (mode);
22172 emit_move_insn (res, operand1);
22174 /* xa = abs (operand1) */
22175 xa = ix86_expand_sse_fabs (res, &mask);
22177 /* if (!isless (xa, TWO52)) goto label; */
22178 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22180 /* xa = xa + TWO52 - TWO52; */
22181 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22182 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22184 /* xa = copysign (xa, operand1) */
22185 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22187 /* generate 1.0 or -1.0 */
22188 one = force_reg (mode,
22189 const_double_from_real_value (do_floor
22190 ? dconst1 : dconstm1, mode));
22192 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22193 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22194 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22195 gen_rtx_AND (mode, one, tmp)));
22196 /* We always need to subtract here to preserve signed zero. */
22197 tmp = expand_simple_binop (mode, MINUS,
22198 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22199 emit_move_insn (res, tmp);
22201 emit_label (label);
22202 LABEL_NUSES (label) = 1;
22204 emit_move_insn (operand0, res);
22207 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22210 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22212 /* C code for the stuff we expand below.
22213 double xa = fabs (x), x2;
22214 if (!isless (xa, TWO52))
22216 x2 = (double)(long)x;
22223 if (HONOR_SIGNED_ZEROS (mode))
22224 return copysign (x2, x);
22227 enum machine_mode mode = GET_MODE (operand0);
22228 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22230 TWO52 = ix86_gen_TWO52 (mode);
22232 /* Temporary for holding the result, initialized to the input
22233 operand to ease control flow. */
22234 res = gen_reg_rtx (mode);
22235 emit_move_insn (res, operand1);
22237 /* xa = abs (operand1) */
22238 xa = ix86_expand_sse_fabs (res, &mask);
22240 /* if (!isless (xa, TWO52)) goto label; */
22241 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22243 /* xa = (double)(long)x */
22244 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22245 expand_fix (xi, res, 0);
22246 expand_float (xa, xi, 0);
22249 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22251 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22252 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22253 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22254 gen_rtx_AND (mode, one, tmp)));
22255 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22256 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22257 emit_move_insn (res, tmp);
22259 if (HONOR_SIGNED_ZEROS (mode))
22260 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22262 emit_label (label);
22263 LABEL_NUSES (label) = 1;
22265 emit_move_insn (operand0, res);
22268 /* Expand SSE sequence for computing round from OPERAND1 storing
22269 into OPERAND0. Sequence that works without relying on DImode truncation
22270 via cvttsd2siq that is only available on 64bit targets. */
22272 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22274 /* C code for the stuff we expand below.
22275 double xa = fabs (x), xa2, x2;
22276 if (!isless (xa, TWO52))
22278 Using the absolute value and copying back sign makes
22279 -0.0 -> -0.0 correct.
22280 xa2 = xa + TWO52 - TWO52;
22285 else if (dxa > 0.5)
22287 x2 = copysign (xa2, x);
22290 enum machine_mode mode = GET_MODE (operand0);
22291 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22293 TWO52 = ix86_gen_TWO52 (mode);
22295 /* Temporary for holding the result, initialized to the input
22296 operand to ease control flow. */
22297 res = gen_reg_rtx (mode);
22298 emit_move_insn (res, operand1);
22300 /* xa = abs (operand1) */
22301 xa = ix86_expand_sse_fabs (res, &mask);
22303 /* if (!isless (xa, TWO52)) goto label; */
22304 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22306 /* xa2 = xa + TWO52 - TWO52; */
22307 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22308 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22310 /* dxa = xa2 - xa; */
22311 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22313 /* generate 0.5, 1.0 and -0.5 */
22314 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22315 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22316 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22320 tmp = gen_reg_rtx (mode);
22321 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22322 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22323 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22324 gen_rtx_AND (mode, one, tmp)));
22325 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22326 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22327 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22328 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22329 gen_rtx_AND (mode, one, tmp)));
22330 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22332 /* res = copysign (xa2, operand1) */
22333 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22335 emit_label (label);
22336 LABEL_NUSES (label) = 1;
22338 emit_move_insn (operand0, res);
22341 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22344 ix86_expand_trunc (rtx operand0, rtx operand1)
22346 /* C code for SSE variant we expand below.
22347 double xa = fabs (x), x2;
22348 if (!isless (xa, TWO52))
22350 x2 = (double)(long)x;
22351 if (HONOR_SIGNED_ZEROS (mode))
22352 return copysign (x2, x);
22355 enum machine_mode mode = GET_MODE (operand0);
22356 rtx xa, xi, TWO52, label, res, mask;
22358 TWO52 = ix86_gen_TWO52 (mode);
22360 /* Temporary for holding the result, initialized to the input
22361 operand to ease control flow. */
22362 res = gen_reg_rtx (mode);
22363 emit_move_insn (res, operand1);
22365 /* xa = abs (operand1) */
22366 xa = ix86_expand_sse_fabs (res, &mask);
22368 /* if (!isless (xa, TWO52)) goto label; */
22369 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22371 /* x = (double)(long)x */
22372 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22373 expand_fix (xi, res, 0);
22374 expand_float (res, xi, 0);
22376 if (HONOR_SIGNED_ZEROS (mode))
22377 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22379 emit_label (label);
22380 LABEL_NUSES (label) = 1;
22382 emit_move_insn (operand0, res);
22385 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22388 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22390 enum machine_mode mode = GET_MODE (operand0);
22391 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22393 /* C code for SSE variant we expand below.
22394 double xa = fabs (x), x2;
22395 if (!isless (xa, TWO52))
22397 xa2 = xa + TWO52 - TWO52;
22401 x2 = copysign (xa2, x);
22405 TWO52 = ix86_gen_TWO52 (mode);
22407 /* Temporary for holding the result, initialized to the input
22408 operand to ease control flow. */
22409 res = gen_reg_rtx (mode);
22410 emit_move_insn (res, operand1);
22412 /* xa = abs (operand1) */
22413 xa = ix86_expand_sse_fabs (res, &smask);
22415 /* if (!isless (xa, TWO52)) goto label; */
22416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22418 /* res = xa + TWO52 - TWO52; */
22419 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22420 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
22421 emit_move_insn (res, tmp);
22424 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22426 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22427 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
22428 emit_insn (gen_rtx_SET (VOIDmode, mask,
22429 gen_rtx_AND (mode, mask, one)));
22430 tmp = expand_simple_binop (mode, MINUS,
22431 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
22432 emit_move_insn (res, tmp);
22434 /* res = copysign (res, operand1) */
22435 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
22437 emit_label (label);
22438 LABEL_NUSES (label) = 1;
22440 emit_move_insn (operand0, res);
22443 /* Expand SSE sequence for computing round from OPERAND1 storing
22446 ix86_expand_round (rtx operand0, rtx operand1)
22448 /* C code for the stuff we're doing below:
22449 double xa = fabs (x);
22450 if (!isless (xa, TWO52))
22452 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22453 return copysign (xa, x);
22455 enum machine_mode mode = GET_MODE (operand0);
22456 rtx res, TWO52, xa, label, xi, half, mask;
22457 const struct real_format *fmt;
22458 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22460 /* Temporary for holding the result, initialized to the input
22461 operand to ease control flow. */
22462 res = gen_reg_rtx (mode);
22463 emit_move_insn (res, operand1);
22465 TWO52 = ix86_gen_TWO52 (mode);
22466 xa = ix86_expand_sse_fabs (res, &mask);
22467 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22469 /* load nextafter (0.5, 0.0) */
22470 fmt = REAL_MODE_FORMAT (mode);
22471 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22472 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22474 /* xa = xa + 0.5 */
22475 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
22476 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
22478 /* xa = (double)(int64_t)xa */
22479 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22480 expand_fix (xi, xa, 0);
22481 expand_float (xa, xi, 0);
22483 /* res = copysign (xa, operand1) */
22484 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
22486 emit_label (label);
22487 LABEL_NUSES (label) = 1;
22489 emit_move_insn (operand0, res);
22493 /* Table of valid machine attributes. */
22494 static const struct attribute_spec ix86_attribute_table[] =
22496 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22497 /* Stdcall attribute says callee is responsible for popping arguments
22498 if they are not variable. */
22499 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22500 /* Fastcall attribute says callee is responsible for popping arguments
22501 if they are not variable. */
22502 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22503 /* Cdecl attribute says the callee is a normal C declaration */
22504 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22505 /* Regparm attribute specifies how many integer arguments are to be
22506 passed in registers. */
22507 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22508 /* Sseregparm attribute says we are using x86_64 calling conventions
22509 for FP arguments. */
22510 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22511 /* force_align_arg_pointer says this function realigns the stack at entry. */
22512 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22513 false, true, true, ix86_handle_cconv_attribute },
22514 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22515 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22516 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22517 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22519 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22520 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22521 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22522 SUBTARGET_ATTRIBUTE_TABLE,
22524 { NULL, 0, 0, false, false, false, NULL }
22527 /* Initialize the GCC target structure. */
22528 #undef TARGET_ATTRIBUTE_TABLE
22529 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22530 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22531 # undef TARGET_MERGE_DECL_ATTRIBUTES
22532 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22535 #undef TARGET_COMP_TYPE_ATTRIBUTES
22536 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22538 #undef TARGET_INIT_BUILTINS
22539 #define TARGET_INIT_BUILTINS ix86_init_builtins
22540 #undef TARGET_EXPAND_BUILTIN
22541 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22543 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22544 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22545 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22546 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22548 #undef TARGET_ASM_FUNCTION_EPILOGUE
22549 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22551 #undef TARGET_ENCODE_SECTION_INFO
22552 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22553 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22555 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22558 #undef TARGET_ASM_OPEN_PAREN
22559 #define TARGET_ASM_OPEN_PAREN ""
22560 #undef TARGET_ASM_CLOSE_PAREN
22561 #define TARGET_ASM_CLOSE_PAREN ""
22563 #undef TARGET_ASM_ALIGNED_HI_OP
22564 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22565 #undef TARGET_ASM_ALIGNED_SI_OP
22566 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22568 #undef TARGET_ASM_ALIGNED_DI_OP
22569 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22572 #undef TARGET_ASM_UNALIGNED_HI_OP
22573 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22574 #undef TARGET_ASM_UNALIGNED_SI_OP
22575 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22576 #undef TARGET_ASM_UNALIGNED_DI_OP
22577 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22579 #undef TARGET_SCHED_ADJUST_COST
22580 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22581 #undef TARGET_SCHED_ISSUE_RATE
22582 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22583 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22584 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22585 ia32_multipass_dfa_lookahead
22587 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22588 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22591 #undef TARGET_HAVE_TLS
22592 #define TARGET_HAVE_TLS true
22594 #undef TARGET_CANNOT_FORCE_CONST_MEM
22595 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22596 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22597 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22599 #undef TARGET_DELEGITIMIZE_ADDRESS
22600 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22602 #undef TARGET_MS_BITFIELD_LAYOUT_P
22603 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22606 #undef TARGET_BINDS_LOCAL_P
22607 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22609 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22610 #undef TARGET_BINDS_LOCAL_P
22611 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22614 #undef TARGET_ASM_OUTPUT_MI_THUNK
22615 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22616 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22617 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22619 #undef TARGET_ASM_FILE_START
22620 #define TARGET_ASM_FILE_START x86_file_start
22622 #undef TARGET_DEFAULT_TARGET_FLAGS
22623 #define TARGET_DEFAULT_TARGET_FLAGS \
22625 | TARGET_64BIT_DEFAULT \
22626 | TARGET_SUBTARGET_DEFAULT \
22627 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22629 #undef TARGET_HANDLE_OPTION
22630 #define TARGET_HANDLE_OPTION ix86_handle_option
22632 #undef TARGET_RTX_COSTS
22633 #define TARGET_RTX_COSTS ix86_rtx_costs
22634 #undef TARGET_ADDRESS_COST
22635 #define TARGET_ADDRESS_COST ix86_address_cost
22637 #undef TARGET_FIXED_CONDITION_CODE_REGS
22638 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22639 #undef TARGET_CC_MODES_COMPATIBLE
22640 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22642 #undef TARGET_MACHINE_DEPENDENT_REORG
22643 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22645 #undef TARGET_BUILD_BUILTIN_VA_LIST
22646 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22648 #undef TARGET_MD_ASM_CLOBBERS
22649 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22651 #undef TARGET_PROMOTE_PROTOTYPES
22652 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22653 #undef TARGET_STRUCT_VALUE_RTX
22654 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22655 #undef TARGET_SETUP_INCOMING_VARARGS
22656 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22657 #undef TARGET_MUST_PASS_IN_STACK
22658 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22659 #undef TARGET_PASS_BY_REFERENCE
22660 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22661 #undef TARGET_INTERNAL_ARG_POINTER
22662 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22663 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22664 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22665 #undef TARGET_STRICT_ARGUMENT_NAMING
22666 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22668 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22669 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22671 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22672 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22674 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22675 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22678 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22679 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22682 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22683 #undef TARGET_INSERT_ATTRIBUTES
22684 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22687 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22688 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22690 #undef TARGET_STACK_PROTECT_FAIL
22691 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22693 #undef TARGET_FUNCTION_VALUE
22694 #define TARGET_FUNCTION_VALUE ix86_function_value
22696 struct gcc_target targetm = TARGET_INITIALIZER;
22698 #include "gt-i386.h"