1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
736 DUMMY_STRINGOP_ALGS},
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068 /* X86_TUNE_USE_MOV0 */
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1083 /* X86_TUNE_READ_MODIFY */
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1096 /* X86_TUNE_QIMODE_MATH */
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168 /* X86_TUNE_SHIFT1 */
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1231 ~(m_386 | m_486 | m_PENT | m_K6),
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1253 static enum stringop_alg stringop_alg = no_stringop;
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1291 /* The "default" register map used in 32bit mode. */
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1304 static int const x86_64_int_parameter_registers[6] =
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1316 static int const x86_64_int_return_registers[4] =
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1408 /* Define the structure for the machine field in struct function. */
1410 struct stack_local_entry GTY(())
1412 unsigned short mode;
1415 struct stack_local_entry *next;
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1442 HOST_WIDE_INT frame;
1444 int outgoing_arguments_size;
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1493 int ix86_section_threshold = 65536;
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1520 static const char * const x86_64_reg_class_name[] =
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1526 #define MAX_CLASSES 4
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1547 /* Bit flags that specify the ISA we are compiling for. */
1548 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1550 /* A mask of ix86_isa_flags that includes bit X if X
1551 was set or cleared on the command line. */
1552 static int ix86_isa_flags_explicit;
1554 /* Define a set of ISAs which aren't available for a given ISA. MMX
1555 and SSE ISAs are handled separately. */
1557 #define OPTION_MASK_ISA_MMX_UNSET \
1558 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1559 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1561 #define OPTION_MASK_ISA_SSE_UNSET \
1562 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1563 #define OPTION_MASK_ISA_SSE2_UNSET \
1564 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1565 #define OPTION_MASK_ISA_SSE3_UNSET \
1566 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1567 #define OPTION_MASK_ISA_SSSE3_UNSET \
1568 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1569 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1570 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1571 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1573 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1574 as -msse4.1 -msse4.2. -mno-sse4 should the same as -msse4.1. */
1575 #define OPTION_MASK_ISA_SSE4 \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1577 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1579 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1581 /* Implement TARGET_HANDLE_OPTION. */
1584 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1589 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1592 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1593 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1598 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1601 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1602 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1613 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1614 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1619 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1622 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1628 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1631 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1632 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1637 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1640 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1641 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1646 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1649 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1650 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1655 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1658 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1659 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1664 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1665 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1669 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1670 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1674 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1687 /* Sometimes certain combinations of command options do not make
1688 sense on a particular target machine. You can define a macro
1689 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1690 defined, is executed once just after all the command options have
1693 Don't use this macro to turn on various extra optimizations for
1694 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1697 override_options (void)
1700 int ix86_tune_defaulted = 0;
1701 unsigned int ix86_arch_mask, ix86_tune_mask;
1703 /* Comes from final.c -- no real reason to change it. */
1704 #define MAX_CODE_ALIGN 16
1708 const struct processor_costs *cost; /* Processor costs */
1709 const int align_loop; /* Default alignments. */
1710 const int align_loop_max_skip;
1711 const int align_jump;
1712 const int align_jump_max_skip;
1713 const int align_func;
1715 const processor_target_table[PROCESSOR_max] =
1717 {&i386_cost, 4, 3, 4, 3, 4},
1718 {&i486_cost, 16, 15, 16, 15, 16},
1719 {&pentium_cost, 16, 7, 16, 7, 16},
1720 {&pentiumpro_cost, 16, 15, 16, 7, 16},
1721 {&geode_cost, 0, 0, 0, 0, 0},
1722 {&k6_cost, 32, 7, 32, 7, 32},
1723 {&athlon_cost, 16, 7, 16, 7, 16},
1724 {&pentium4_cost, 0, 0, 0, 0, 0},
1725 {&k8_cost, 16, 7, 16, 7, 16},
1726 {&nocona_cost, 0, 0, 0, 0, 0},
1727 {&core2_cost, 16, 7, 16, 7, 16},
1728 {&generic32_cost, 16, 7, 16, 7, 16},
1729 {&generic64_cost, 16, 7, 16, 7, 16},
1730 {&amdfam10_cost, 32, 24, 32, 7, 32}
1733 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1736 const char *const name; /* processor name or nickname. */
1737 const enum processor_type processor;
1738 const enum pta_flags
1744 PTA_PREFETCH_SSE = 1 << 4,
1746 PTA_3DNOW_A = 1 << 6,
1750 PTA_POPCNT = 1 << 10,
1752 PTA_SSE4A = 1 << 12,
1753 PTA_NO_SAHF = 1 << 13,
1754 PTA_SSE4_1 = 1 << 14,
1755 PTA_SSE4_2 = 1 << 15
1758 const processor_alias_table[] =
1760 {"i386", PROCESSOR_I386, 0},
1761 {"i486", PROCESSOR_I486, 0},
1762 {"i586", PROCESSOR_PENTIUM, 0},
1763 {"pentium", PROCESSOR_PENTIUM, 0},
1764 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1765 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1766 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1767 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1768 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1769 {"i686", PROCESSOR_PENTIUMPRO, 0},
1770 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1771 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1772 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1773 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1774 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1775 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1776 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1777 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1778 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1779 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1780 | PTA_CX16 | PTA_NO_SAHF)},
1781 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1782 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1785 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1786 |PTA_PREFETCH_SSE)},
1787 {"k6", PROCESSOR_K6, PTA_MMX},
1788 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1789 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1790 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1791 | PTA_PREFETCH_SSE)},
1792 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1793 | PTA_PREFETCH_SSE)},
1794 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1796 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1798 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1800 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1801 | PTA_MMX | PTA_SSE | PTA_SSE2
1803 {"k8", PROCESSOR_K8, (PTA_64BIT
1804 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1805 | PTA_SSE | PTA_SSE2
1807 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1808 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1809 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1811 {"opteron", PROCESSOR_K8, (PTA_64BIT
1812 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1813 | PTA_SSE | PTA_SSE2
1815 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1816 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1817 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1819 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1820 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1821 | PTA_SSE | PTA_SSE2
1823 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1824 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1825 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1827 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1828 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1829 | PTA_SSE | PTA_SSE2
1831 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1832 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1833 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1835 | PTA_CX16 | PTA_ABM)},
1836 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1837 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1838 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1840 | PTA_CX16 | PTA_ABM)},
1841 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1842 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1845 int const pta_size = ARRAY_SIZE (processor_alias_table);
1847 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1848 SUBTARGET_OVERRIDE_OPTIONS;
1851 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1852 SUBSUBTARGET_OVERRIDE_OPTIONS;
1855 /* -fPIC is the default for x86_64. */
1856 if (TARGET_MACHO && TARGET_64BIT)
1859 /* Set the default values for switches whose default depends on TARGET_64BIT
1860 in case they weren't overwritten by command line options. */
1863 /* Mach-O doesn't support omitting the frame pointer for now. */
1864 if (flag_omit_frame_pointer == 2)
1865 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1866 if (flag_asynchronous_unwind_tables == 2)
1867 flag_asynchronous_unwind_tables = 1;
1868 if (flag_pcc_struct_return == 2)
1869 flag_pcc_struct_return = 0;
1873 if (flag_omit_frame_pointer == 2)
1874 flag_omit_frame_pointer = 0;
1875 if (flag_asynchronous_unwind_tables == 2)
1876 flag_asynchronous_unwind_tables = 0;
1877 if (flag_pcc_struct_return == 2)
1878 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1881 /* Need to check -mtune=generic first. */
1882 if (ix86_tune_string)
1884 if (!strcmp (ix86_tune_string, "generic")
1885 || !strcmp (ix86_tune_string, "i686")
1886 /* As special support for cross compilers we read -mtune=native
1887 as -mtune=generic. With native compilers we won't see the
1888 -mtune=native, as it was changed by the driver. */
1889 || !strcmp (ix86_tune_string, "native"))
1892 ix86_tune_string = "generic64";
1894 ix86_tune_string = "generic32";
1896 else if (!strncmp (ix86_tune_string, "generic", 7))
1897 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1901 if (ix86_arch_string)
1902 ix86_tune_string = ix86_arch_string;
1903 if (!ix86_tune_string)
1905 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1906 ix86_tune_defaulted = 1;
1909 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1910 need to use a sensible tune option. */
1911 if (!strcmp (ix86_tune_string, "generic")
1912 || !strcmp (ix86_tune_string, "x86-64")
1913 || !strcmp (ix86_tune_string, "i686"))
1916 ix86_tune_string = "generic64";
1918 ix86_tune_string = "generic32";
1921 if (ix86_stringop_string)
1923 if (!strcmp (ix86_stringop_string, "rep_byte"))
1924 stringop_alg = rep_prefix_1_byte;
1925 else if (!strcmp (ix86_stringop_string, "libcall"))
1926 stringop_alg = libcall;
1927 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1928 stringop_alg = rep_prefix_4_byte;
1929 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1930 stringop_alg = rep_prefix_8_byte;
1931 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1932 stringop_alg = loop_1_byte;
1933 else if (!strcmp (ix86_stringop_string, "loop"))
1934 stringop_alg = loop;
1935 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1936 stringop_alg = unrolled_loop;
1938 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1940 if (!strcmp (ix86_tune_string, "x86-64"))
1941 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1942 "-mtune=generic instead as appropriate.");
1944 if (!ix86_arch_string)
1945 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1946 if (!strcmp (ix86_arch_string, "generic"))
1947 error ("generic CPU can be used only for -mtune= switch");
1948 if (!strncmp (ix86_arch_string, "generic", 7))
1949 error ("bad value (%s) for -march= switch", ix86_arch_string);
1951 if (ix86_cmodel_string != 0)
1953 if (!strcmp (ix86_cmodel_string, "small"))
1954 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1955 else if (!strcmp (ix86_cmodel_string, "medium"))
1956 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1957 else if (!strcmp (ix86_cmodel_string, "large"))
1958 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1960 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1961 else if (!strcmp (ix86_cmodel_string, "32"))
1962 ix86_cmodel = CM_32;
1963 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1964 ix86_cmodel = CM_KERNEL;
1966 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1970 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1971 use of rip-relative addressing. This eliminates fixups that
1972 would otherwise be needed if this object is to be placed in a
1973 DLL, and is essentially just as efficient as direct addressing. */
1974 if (TARGET_64BIT_MS_ABI)
1975 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1976 else if (TARGET_64BIT)
1977 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1979 ix86_cmodel = CM_32;
1981 if (ix86_asm_string != 0)
1984 && !strcmp (ix86_asm_string, "intel"))
1985 ix86_asm_dialect = ASM_INTEL;
1986 else if (!strcmp (ix86_asm_string, "att"))
1987 ix86_asm_dialect = ASM_ATT;
1989 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1991 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1992 error ("code model %qs not supported in the %s bit mode",
1993 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1994 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
1995 sorry ("%i-bit mode not compiled in",
1996 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
1998 for (i = 0; i < pta_size; i++)
1999 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2001 ix86_arch = processor_alias_table[i].processor;
2002 /* Default cpu tuning to the architecture. */
2003 ix86_tune = ix86_arch;
2005 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2006 error ("CPU you selected does not support x86-64 "
2009 if (processor_alias_table[i].flags & PTA_MMX
2010 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2011 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2012 if (processor_alias_table[i].flags & PTA_3DNOW
2013 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2014 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2015 if (processor_alias_table[i].flags & PTA_3DNOW_A
2016 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2017 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2018 if (processor_alias_table[i].flags & PTA_SSE
2019 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2020 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2021 if (processor_alias_table[i].flags & PTA_SSE2
2022 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2023 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2024 if (processor_alias_table[i].flags & PTA_SSE3
2025 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2026 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2027 if (processor_alias_table[i].flags & PTA_SSSE3
2028 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2029 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2030 if (processor_alias_table[i].flags & PTA_SSE4_1
2031 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2032 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2033 if (processor_alias_table[i].flags & PTA_SSE4_2
2034 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2036 if (processor_alias_table[i].flags & PTA_SSE4A
2037 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2038 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2040 if (processor_alias_table[i].flags & PTA_ABM)
2042 if (processor_alias_table[i].flags & PTA_CX16)
2043 x86_cmpxchg16b = true;
2044 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2046 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2047 x86_prefetch_sse = true;
2048 if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
2055 error ("bad value (%s) for -march= switch", ix86_arch_string);
2057 ix86_arch_mask = 1u << ix86_arch;
2058 for (i = 0; i < X86_ARCH_LAST; ++i)
2059 ix86_arch_features[i] &= ix86_arch_mask;
2061 for (i = 0; i < pta_size; i++)
2062 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2064 ix86_tune = processor_alias_table[i].processor;
2065 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2067 if (ix86_tune_defaulted)
2069 ix86_tune_string = "x86-64";
2070 for (i = 0; i < pta_size; i++)
2071 if (! strcmp (ix86_tune_string,
2072 processor_alias_table[i].name))
2074 ix86_tune = processor_alias_table[i].processor;
2077 error ("CPU you selected does not support x86-64 "
2080 /* Intel CPUs have always interpreted SSE prefetch instructions as
2081 NOPs; so, we can enable SSE prefetch instructions even when
2082 -mtune (rather than -march) points us to a processor that has them.
2083 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2084 higher processors. */
2086 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2087 x86_prefetch_sse = true;
2091 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2093 ix86_tune_mask = 1u << ix86_tune;
2094 for (i = 0; i < X86_TUNE_LAST; ++i)
2095 ix86_tune_features[i] &= ix86_tune_mask;
2098 ix86_cost = &size_cost;
2100 ix86_cost = processor_target_table[ix86_tune].cost;
2102 /* Arrange to set up i386_stack_locals for all functions. */
2103 init_machine_status = ix86_init_machine_status;
2105 /* Validate -mregparm= value. */
2106 if (ix86_regparm_string)
2109 warning (0, "-mregparm is ignored in 64-bit mode");
2110 i = atoi (ix86_regparm_string);
2111 if (i < 0 || i > REGPARM_MAX)
2112 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2117 ix86_regparm = REGPARM_MAX;
2119 /* If the user has provided any of the -malign-* options,
2120 warn and use that value only if -falign-* is not set.
2121 Remove this code in GCC 3.2 or later. */
2122 if (ix86_align_loops_string)
2124 warning (0, "-malign-loops is obsolete, use -falign-loops");
2125 if (align_loops == 0)
2127 i = atoi (ix86_align_loops_string);
2128 if (i < 0 || i > MAX_CODE_ALIGN)
2129 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2131 align_loops = 1 << i;
2135 if (ix86_align_jumps_string)
2137 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2138 if (align_jumps == 0)
2140 i = atoi (ix86_align_jumps_string);
2141 if (i < 0 || i > MAX_CODE_ALIGN)
2142 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2144 align_jumps = 1 << i;
2148 if (ix86_align_funcs_string)
2150 warning (0, "-malign-functions is obsolete, use -falign-functions");
2151 if (align_functions == 0)
2153 i = atoi (ix86_align_funcs_string);
2154 if (i < 0 || i > MAX_CODE_ALIGN)
2155 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2157 align_functions = 1 << i;
2161 /* Default align_* from the processor table. */
2162 if (align_loops == 0)
2164 align_loops = processor_target_table[ix86_tune].align_loop;
2165 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2167 if (align_jumps == 0)
2169 align_jumps = processor_target_table[ix86_tune].align_jump;
2170 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2172 if (align_functions == 0)
2174 align_functions = processor_target_table[ix86_tune].align_func;
2177 /* Validate -mbranch-cost= value, or provide default. */
2178 ix86_branch_cost = ix86_cost->branch_cost;
2179 if (ix86_branch_cost_string)
2181 i = atoi (ix86_branch_cost_string);
2183 error ("-mbranch-cost=%d is not between 0 and 5", i);
2185 ix86_branch_cost = i;
2187 if (ix86_section_threshold_string)
2189 i = atoi (ix86_section_threshold_string);
2191 error ("-mlarge-data-threshold=%d is negative", i);
2193 ix86_section_threshold = i;
2196 if (ix86_tls_dialect_string)
2198 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2199 ix86_tls_dialect = TLS_DIALECT_GNU;
2200 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2201 ix86_tls_dialect = TLS_DIALECT_GNU2;
2202 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2203 ix86_tls_dialect = TLS_DIALECT_SUN;
2205 error ("bad value (%s) for -mtls-dialect= switch",
2206 ix86_tls_dialect_string);
2209 if (ix87_precision_string)
2211 i = atoi (ix87_precision_string);
2212 if (i != 32 && i != 64 && i != 80)
2213 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2216 /* Keep nonleaf frame pointers. */
2217 if (flag_omit_frame_pointer)
2218 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2219 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2220 flag_omit_frame_pointer = 1;
2222 /* If we're doing fast math, we don't care about comparison order
2223 wrt NaNs. This lets us use a shorter comparison sequence. */
2224 if (flag_finite_math_only)
2225 target_flags &= ~MASK_IEEE_FP;
2227 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2228 since the insns won't need emulation. */
2229 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2230 target_flags &= ~MASK_NO_FANCY_MATH_387;
2232 /* Likewise, if the target doesn't have a 387, or we've specified
2233 software floating point, don't use 387 inline intrinsics. */
2235 target_flags |= MASK_NO_FANCY_MATH_387;
2237 /* Turn on SSE4.1 builtins and popcnt instruction for -msse4.2. */
2240 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2244 /* Turn on SSSE3 builtins for -msse4.1. */
2246 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2248 /* Turn on SSE3 builtins for -mssse3. */
2250 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2252 /* Turn on SSE3 builtins for -msse4a. */
2254 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2256 /* Turn on SSE2 builtins for -msse3. */
2258 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2260 /* Turn on SSE builtins for -msse2. */
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2264 /* Turn on MMX builtins for -msse. */
2267 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2268 x86_prefetch_sse = true;
2271 /* Turn on MMX builtins for 3Dnow. */
2273 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2275 /* Turn on POPCNT builtins for -mabm. */
2281 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2283 /* Enable by default the SSE and MMX builtins. Do allow the user to
2284 explicitly disable any of these. In particular, disabling SSE and
2285 MMX for kernel code is extremely useful. */
2287 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2288 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2291 warning (0, "-mrtd is ignored in 64bit mode");
2295 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2298 |= TARGET_SUBTARGET32_DEFAULT & ~ix86_isa_flags_explicit;
2300 /* i386 ABI does not specify red zone. It still makes sense to use it
2301 when programmer takes care to stack from being destroyed. */
2302 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2303 target_flags |= MASK_NO_RED_ZONE;
2306 /* Validate -mpreferred-stack-boundary= value, or provide default.
2307 The default of 128 bits is for Pentium III's SSE __m128. We can't
2308 change it because of optimize_size. Otherwise, we can't mix object
2309 files compiled with -Os and -On. */
2310 ix86_preferred_stack_boundary = 128;
2311 if (ix86_preferred_stack_boundary_string)
2313 i = atoi (ix86_preferred_stack_boundary_string);
2314 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2315 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2316 TARGET_64BIT ? 4 : 2);
2318 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2321 /* Accept -msseregparm only if at least SSE support is enabled. */
2322 if (TARGET_SSEREGPARM
2324 error ("-msseregparm used without SSE enabled");
2326 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2327 if (ix86_fpmath_string != 0)
2329 if (! strcmp (ix86_fpmath_string, "387"))
2330 ix86_fpmath = FPMATH_387;
2331 else if (! strcmp (ix86_fpmath_string, "sse"))
2335 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2336 ix86_fpmath = FPMATH_387;
2339 ix86_fpmath = FPMATH_SSE;
2341 else if (! strcmp (ix86_fpmath_string, "387,sse")
2342 || ! strcmp (ix86_fpmath_string, "sse,387"))
2346 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2347 ix86_fpmath = FPMATH_387;
2349 else if (!TARGET_80387)
2351 warning (0, "387 instruction set disabled, using SSE arithmetics");
2352 ix86_fpmath = FPMATH_SSE;
2355 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2358 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2361 /* If the i387 is disabled, then do not return values in it. */
2363 target_flags &= ~MASK_FLOAT_RETURNS;
2365 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2366 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2368 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2370 /* ??? Unwind info is not correct around the CFG unless either a frame
2371 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2372 unwind info generation to be aware of the CFG and propagating states
2374 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2375 || flag_exceptions || flag_non_call_exceptions)
2376 && flag_omit_frame_pointer
2377 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2379 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2380 warning (0, "unwind tables currently require either a frame pointer "
2381 "or -maccumulate-outgoing-args for correctness");
2382 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2385 /* For sane SSE instruction set generation we need fcomi instruction.
2386 It is safe to enable all CMOVE instructions. */
2390 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2393 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2394 p = strchr (internal_label_prefix, 'X');
2395 internal_label_prefix_len = p - internal_label_prefix;
2399 /* When scheduling description is not available, disable scheduler pass
2400 so it won't slow down the compilation and make x87 code slower. */
2401 if (!TARGET_SCHEDULE)
2402 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2404 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2405 set_param_value ("simultaneous-prefetches",
2406 ix86_cost->simultaneous_prefetches);
2407 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2408 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2411 /* Return true if this goes in large data/bss. */
2414 ix86_in_large_data_p (tree exp)
2416 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2419 /* Functions are never large data. */
2420 if (TREE_CODE (exp) == FUNCTION_DECL)
2423 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2425 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2426 if (strcmp (section, ".ldata") == 0
2427 || strcmp (section, ".lbss") == 0)
2433 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2435 /* If this is an incomplete type with size 0, then we can't put it
2436 in data because it might be too big when completed. */
2437 if (!size || size > ix86_section_threshold)
2444 /* Switch to the appropriate section for output of DECL.
2445 DECL is either a `VAR_DECL' node or a constant of some sort.
2446 RELOC indicates whether forming the initial value of DECL requires
2447 link-time relocations. */
2449 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2453 x86_64_elf_select_section (tree decl, int reloc,
2454 unsigned HOST_WIDE_INT align)
2456 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2457 && ix86_in_large_data_p (decl))
2459 const char *sname = NULL;
2460 unsigned int flags = SECTION_WRITE;
2461 switch (categorize_decl_for_section (decl, reloc))
2466 case SECCAT_DATA_REL:
2467 sname = ".ldata.rel";
2469 case SECCAT_DATA_REL_LOCAL:
2470 sname = ".ldata.rel.local";
2472 case SECCAT_DATA_REL_RO:
2473 sname = ".ldata.rel.ro";
2475 case SECCAT_DATA_REL_RO_LOCAL:
2476 sname = ".ldata.rel.ro.local";
2480 flags |= SECTION_BSS;
2483 case SECCAT_RODATA_MERGE_STR:
2484 case SECCAT_RODATA_MERGE_STR_INIT:
2485 case SECCAT_RODATA_MERGE_CONST:
2489 case SECCAT_SRODATA:
2496 /* We don't split these for medium model. Place them into
2497 default sections and hope for best. */
2502 /* We might get called with string constants, but get_named_section
2503 doesn't like them as they are not DECLs. Also, we need to set
2504 flags in that case. */
2506 return get_section (sname, flags, NULL);
2507 return get_named_section (decl, sname, reloc);
2510 return default_elf_select_section (decl, reloc, align);
2513 /* Build up a unique section name, expressed as a
2514 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2515 RELOC indicates whether the initial value of EXP requires
2516 link-time relocations. */
2518 static void ATTRIBUTE_UNUSED
2519 x86_64_elf_unique_section (tree decl, int reloc)
2521 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2522 && ix86_in_large_data_p (decl))
2524 const char *prefix = NULL;
2525 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2526 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2528 switch (categorize_decl_for_section (decl, reloc))
2531 case SECCAT_DATA_REL:
2532 case SECCAT_DATA_REL_LOCAL:
2533 case SECCAT_DATA_REL_RO:
2534 case SECCAT_DATA_REL_RO_LOCAL:
2535 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2538 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2541 case SECCAT_RODATA_MERGE_STR:
2542 case SECCAT_RODATA_MERGE_STR_INIT:
2543 case SECCAT_RODATA_MERGE_CONST:
2544 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2546 case SECCAT_SRODATA:
2553 /* We don't split these for medium model. Place them into
2554 default sections and hope for best. */
2562 plen = strlen (prefix);
2564 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2565 name = targetm.strip_name_encoding (name);
2566 nlen = strlen (name);
2568 string = alloca (nlen + plen + 1);
2569 memcpy (string, prefix, plen);
2570 memcpy (string + plen, name, nlen + 1);
2572 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2576 default_unique_section (decl, reloc);
2579 #ifdef COMMON_ASM_OP
2580 /* This says how to output assembler code to declare an
2581 uninitialized external linkage data object.
2583 For medium model x86-64 we need to use .largecomm opcode for
2586 x86_elf_aligned_common (FILE *file,
2587 const char *name, unsigned HOST_WIDE_INT size,
2590 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2591 && size > (unsigned int)ix86_section_threshold)
2592 fprintf (file, ".largecomm\t");
2594 fprintf (file, "%s", COMMON_ASM_OP);
2595 assemble_name (file, name);
2596 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2597 size, align / BITS_PER_UNIT);
2601 /* Utility function for targets to use in implementing
2602 ASM_OUTPUT_ALIGNED_BSS. */
2605 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2606 const char *name, unsigned HOST_WIDE_INT size,
2609 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2610 && size > (unsigned int)ix86_section_threshold)
2611 switch_to_section (get_named_section (decl, ".lbss", 0));
2613 switch_to_section (bss_section);
2614 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2615 #ifdef ASM_DECLARE_OBJECT_NAME
2616 last_assemble_variable_decl = decl;
2617 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2619 /* Standard thing is just output label for the object. */
2620 ASM_OUTPUT_LABEL (file, name);
2621 #endif /* ASM_DECLARE_OBJECT_NAME */
2622 ASM_OUTPUT_SKIP (file, size ? size : 1);
2626 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2628 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2629 make the problem with not enough registers even worse. */
2630 #ifdef INSN_SCHEDULING
2632 flag_schedule_insns = 0;
2636 /* The Darwin libraries never set errno, so we might as well
2637 avoid calling them when that's the only reason we would. */
2638 flag_errno_math = 0;
2640 /* The default values of these switches depend on the TARGET_64BIT
2641 that is not known at this moment. Mark these values with 2 and
2642 let user the to override these. In case there is no command line option
2643 specifying them, we will set the defaults in override_options. */
2645 flag_omit_frame_pointer = 2;
2646 flag_pcc_struct_return = 2;
2647 flag_asynchronous_unwind_tables = 2;
2648 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2649 SUBTARGET_OPTIMIZATION_OPTIONS;
2653 /* Decide whether we can make a sibling call to a function. DECL is the
2654 declaration of the function being targeted by the call and EXP is the
2655 CALL_EXPR representing the call. */
2658 ix86_function_ok_for_sibcall (tree decl, tree exp)
2663 /* If we are generating position-independent code, we cannot sibcall
2664 optimize any indirect call, or a direct call to a global function,
2665 as the PLT requires %ebx be live. */
2666 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2673 func = TREE_TYPE (CALL_EXPR_FN (exp));
2674 if (POINTER_TYPE_P (func))
2675 func = TREE_TYPE (func);
2678 /* Check that the return value locations are the same. Like
2679 if we are returning floats on the 80387 register stack, we cannot
2680 make a sibcall from a function that doesn't return a float to a
2681 function that does or, conversely, from a function that does return
2682 a float to a function that doesn't; the necessary stack adjustment
2683 would not be executed. This is also the place we notice
2684 differences in the return value ABI. Note that it is ok for one
2685 of the functions to have void return type as long as the return
2686 value of the other is passed in a register. */
2687 a = ix86_function_value (TREE_TYPE (exp), func, false);
2688 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2690 if (STACK_REG_P (a) || STACK_REG_P (b))
2692 if (!rtx_equal_p (a, b))
2695 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2697 else if (!rtx_equal_p (a, b))
2700 /* If this call is indirect, we'll need to be able to use a call-clobbered
2701 register for the address of the target function. Make sure that all
2702 such registers are not used for passing parameters. */
2703 if (!decl && !TARGET_64BIT)
2707 /* We're looking at the CALL_EXPR, we need the type of the function. */
2708 type = CALL_EXPR_FN (exp); /* pointer expression */
2709 type = TREE_TYPE (type); /* pointer type */
2710 type = TREE_TYPE (type); /* function type */
2712 if (ix86_function_regparm (type, NULL) >= 3)
2714 /* ??? Need to count the actual number of registers to be used,
2715 not the possible number of registers. Fix later. */
2720 /* Dllimport'd functions are also called indirectly. */
2721 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2722 && decl && DECL_DLLIMPORT_P (decl)
2723 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2726 /* If we forced aligned the stack, then sibcalling would unalign the
2727 stack, which may break the called function. */
2728 if (cfun->machine->force_align_arg_pointer)
2731 /* Otherwise okay. That also includes certain types of indirect calls. */
2735 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2736 calling convention attributes;
2737 arguments as in struct attribute_spec.handler. */
2740 ix86_handle_cconv_attribute (tree *node, tree name,
2742 int flags ATTRIBUTE_UNUSED,
2745 if (TREE_CODE (*node) != FUNCTION_TYPE
2746 && TREE_CODE (*node) != METHOD_TYPE
2747 && TREE_CODE (*node) != FIELD_DECL
2748 && TREE_CODE (*node) != TYPE_DECL)
2750 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2751 IDENTIFIER_POINTER (name));
2752 *no_add_attrs = true;
2756 /* Can combine regparm with all attributes but fastcall. */
2757 if (is_attribute_p ("regparm", name))
2761 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2763 error ("fastcall and regparm attributes are not compatible");
2766 cst = TREE_VALUE (args);
2767 if (TREE_CODE (cst) != INTEGER_CST)
2769 warning (OPT_Wattributes,
2770 "%qs attribute requires an integer constant argument",
2771 IDENTIFIER_POINTER (name));
2772 *no_add_attrs = true;
2774 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2776 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2777 IDENTIFIER_POINTER (name), REGPARM_MAX);
2778 *no_add_attrs = true;
2782 && lookup_attribute (ix86_force_align_arg_pointer_string,
2783 TYPE_ATTRIBUTES (*node))
2784 && compare_tree_int (cst, REGPARM_MAX-1))
2786 error ("%s functions limited to %d register parameters",
2787 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2795 /* Do not warn when emulating the MS ABI. */
2796 if (!TARGET_64BIT_MS_ABI)
2797 warning (OPT_Wattributes, "%qs attribute ignored",
2798 IDENTIFIER_POINTER (name));
2799 *no_add_attrs = true;
2803 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2804 if (is_attribute_p ("fastcall", name))
2806 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2808 error ("fastcall and cdecl attributes are not compatible");
2810 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2812 error ("fastcall and stdcall attributes are not compatible");
2814 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2816 error ("fastcall and regparm attributes are not compatible");
2820 /* Can combine stdcall with fastcall (redundant), regparm and
2822 else if (is_attribute_p ("stdcall", name))
2824 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2826 error ("stdcall and cdecl attributes are not compatible");
2828 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2830 error ("stdcall and fastcall attributes are not compatible");
2834 /* Can combine cdecl with regparm and sseregparm. */
2835 else if (is_attribute_p ("cdecl", name))
2837 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2839 error ("stdcall and cdecl attributes are not compatible");
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2843 error ("fastcall and cdecl attributes are not compatible");
2847 /* Can combine sseregparm with all attributes. */
2852 /* Return 0 if the attributes for two types are incompatible, 1 if they
2853 are compatible, and 2 if they are nearly compatible (which causes a
2854 warning to be generated). */
2857 ix86_comp_type_attributes (tree type1, tree type2)
2859 /* Check for mismatch of non-default calling convention. */
2860 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2862 if (TREE_CODE (type1) != FUNCTION_TYPE)
2865 /* Check for mismatched fastcall/regparm types. */
2866 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2867 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2868 || (ix86_function_regparm (type1, NULL)
2869 != ix86_function_regparm (type2, NULL)))
2872 /* Check for mismatched sseregparm types. */
2873 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2874 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2877 /* Check for mismatched return types (cdecl vs stdcall). */
2878 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2879 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2885 /* Return the regparm value for a function with the indicated TYPE and DECL.
2886 DECL may be NULL when calling function indirectly
2887 or considering a libcall. */
2890 ix86_function_regparm (tree type, tree decl)
2893 int regparm = ix86_regparm;
2898 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2900 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2902 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2905 /* Use register calling convention for local functions when possible. */
2906 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2907 && flag_unit_at_a_time && !profile_flag)
2909 struct cgraph_local_info *i = cgraph_local_info (decl);
2912 int local_regparm, globals = 0, regno;
2915 /* Make sure no regparm register is taken by a
2916 global register variable. */
2917 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2918 if (global_regs[local_regparm])
2921 /* We can't use regparm(3) for nested functions as these use
2922 static chain pointer in third argument. */
2923 if (local_regparm == 3
2924 && (decl_function_context (decl)
2925 || ix86_force_align_arg_pointer)
2926 && !DECL_NO_STATIC_CHAIN (decl))
2929 /* If the function realigns its stackpointer, the prologue will
2930 clobber %ecx. If we've already generated code for the callee,
2931 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2932 scanning the attributes for the self-realigning property. */
2933 f = DECL_STRUCT_FUNCTION (decl);
2934 if (local_regparm == 3
2935 && (f ? !!f->machine->force_align_arg_pointer
2936 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2937 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2940 /* Each global register variable increases register preassure,
2941 so the more global reg vars there are, the smaller regparm
2942 optimization use, unless requested by the user explicitly. */
2943 for (regno = 0; regno < 6; regno++)
2944 if (global_regs[regno])
2947 = globals < local_regparm ? local_regparm - globals : 0;
2949 if (local_regparm > regparm)
2950 regparm = local_regparm;
2957 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2958 DFmode (2) arguments in SSE registers for a function with the
2959 indicated TYPE and DECL. DECL may be NULL when calling function
2960 indirectly or considering a libcall. Otherwise return 0. */
2963 ix86_function_sseregparm (tree type, tree decl)
2965 gcc_assert (!TARGET_64BIT);
2967 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2968 by the sseregparm attribute. */
2969 if (TARGET_SSEREGPARM
2970 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2975 error ("Calling %qD with attribute sseregparm without "
2976 "SSE/SSE2 enabled", decl);
2978 error ("Calling %qT with attribute sseregparm without "
2979 "SSE/SSE2 enabled", type);
2986 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2987 (and DFmode for SSE2) arguments in SSE registers. */
2988 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2990 struct cgraph_local_info *i = cgraph_local_info (decl);
2992 return TARGET_SSE2 ? 2 : 1;
2998 /* Return true if EAX is live at the start of the function. Used by
2999 ix86_expand_prologue to determine if we need special help before
3000 calling allocate_stack_worker. */
3003 ix86_eax_live_at_start_p (void)
3005 /* Cheat. Don't bother working forward from ix86_function_regparm
3006 to the function type to whether an actual argument is located in
3007 eax. Instead just look at cfg info, which is still close enough
3008 to correct at this point. This gives false positives for broken
3009 functions that might use uninitialized data that happens to be
3010 allocated in eax, but who cares? */
3011 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3014 /* Return true if TYPE has a variable argument list. */
3017 type_has_variadic_args_p (tree type)
3019 tree n, t = TYPE_ARG_TYPES (type);
3024 while ((n = TREE_CHAIN (t)) != NULL)
3027 return TREE_VALUE (t) != void_type_node;
3030 /* Value is the number of bytes of arguments automatically
3031 popped when returning from a subroutine call.
3032 FUNDECL is the declaration node of the function (as a tree),
3033 FUNTYPE is the data type of the function (as a tree),
3034 or for a library call it is an identifier node for the subroutine name.
3035 SIZE is the number of bytes of arguments passed on the stack.
3037 On the 80386, the RTD insn may be used to pop them if the number
3038 of args is fixed, but if the number is variable then the caller
3039 must pop them all. RTD can't be used for library calls now
3040 because the library is compiled with the Unix compiler.
3041 Use of RTD is a selectable option, since it is incompatible with
3042 standard Unix calling sequences. If the option is not selected,
3043 the caller must always pop the args.
3045 The attribute stdcall is equivalent to RTD on a per module basis. */
3048 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3052 /* None of the 64-bit ABIs pop arguments. */
3056 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3058 /* Cdecl functions override -mrtd, and never pop the stack. */
3059 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3061 /* Stdcall and fastcall functions will pop the stack if not
3063 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3064 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3067 if (rtd && ! type_has_variadic_args_p (funtype))
3071 /* Lose any fake structure return argument if it is passed on the stack. */
3072 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3073 && !KEEP_AGGREGATE_RETURN_POINTER)
3075 int nregs = ix86_function_regparm (funtype, fundecl);
3077 return GET_MODE_SIZE (Pmode);
3083 /* Argument support functions. */
3085 /* Return true when register may be used to pass function parameters. */
3087 ix86_function_arg_regno_p (int regno)
3090 const int *parm_regs;
3095 return (regno < REGPARM_MAX
3096 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3098 return (regno < REGPARM_MAX
3099 || (TARGET_MMX && MMX_REGNO_P (regno)
3100 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3101 || (TARGET_SSE && SSE_REGNO_P (regno)
3102 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3107 if (SSE_REGNO_P (regno) && TARGET_SSE)
3112 if (TARGET_SSE && SSE_REGNO_P (regno)
3113 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3117 /* RAX is used as hidden argument to va_arg functions. */
3118 if (!TARGET_64BIT_MS_ABI && regno == 0)
3121 if (TARGET_64BIT_MS_ABI)
3122 parm_regs = x86_64_ms_abi_int_parameter_registers;
3124 parm_regs = x86_64_int_parameter_registers;
3125 for (i = 0; i < REGPARM_MAX; i++)
3126 if (regno == parm_regs[i])
3131 /* Return if we do not know how to pass TYPE solely in registers. */
3134 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3136 if (must_pass_in_stack_var_size_or_pad (mode, type))
3139 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3140 The layout_type routine is crafty and tries to trick us into passing
3141 currently unsupported vector types on the stack by using TImode. */
3142 return (!TARGET_64BIT && mode == TImode
3143 && type && TREE_CODE (type) != VECTOR_TYPE);
3146 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3147 for a call to a function whose data type is FNTYPE.
3148 For a library call, FNTYPE is 0. */
3151 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3152 tree fntype, /* tree ptr for function decl */
3153 rtx libname, /* SYMBOL_REF of library name or 0 */
3156 memset (cum, 0, sizeof (*cum));
3158 /* Set up the number of registers to use for passing arguments. */
3159 cum->nregs = ix86_regparm;
3161 cum->sse_nregs = SSE_REGPARM_MAX;
3163 cum->mmx_nregs = MMX_REGPARM_MAX;
3164 cum->warn_sse = true;
3165 cum->warn_mmx = true;
3166 cum->maybe_vaarg = (fntype
3167 ? (!TYPE_ARG_TYPES (fntype)
3168 || type_has_variadic_args_p (fntype))
3173 /* If there are variable arguments, then we won't pass anything
3174 in registers in 32-bit mode. */
3175 if (cum->maybe_vaarg)
3185 /* Use ecx and edx registers if function has fastcall attribute,
3186 else look for regparm information. */
3189 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3195 cum->nregs = ix86_function_regparm (fntype, fndecl);
3198 /* Set up the number of SSE registers used for passing SFmode
3199 and DFmode arguments. Warn for mismatching ABI. */
3200 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3204 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3205 But in the case of vector types, it is some vector mode.
3207 When we have only some of our vector isa extensions enabled, then there
3208 are some modes for which vector_mode_supported_p is false. For these
3209 modes, the generic vector support in gcc will choose some non-vector mode
3210 in order to implement the type. By computing the natural mode, we'll
3211 select the proper ABI location for the operand and not depend on whatever
3212 the middle-end decides to do with these vector types. */
3214 static enum machine_mode
3215 type_natural_mode (tree type)
3217 enum machine_mode mode = TYPE_MODE (type);
3219 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3221 HOST_WIDE_INT size = int_size_in_bytes (type);
3222 if ((size == 8 || size == 16)
3223 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3224 && TYPE_VECTOR_SUBPARTS (type) > 1)
3226 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3228 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3229 mode = MIN_MODE_VECTOR_FLOAT;
3231 mode = MIN_MODE_VECTOR_INT;
3233 /* Get the mode which has this inner mode and number of units. */
3234 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3235 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3236 && GET_MODE_INNER (mode) == innermode)
3246 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3247 this may not agree with the mode that the type system has chosen for the
3248 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3249 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3252 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3257 if (orig_mode != BLKmode)
3258 tmp = gen_rtx_REG (orig_mode, regno);
3261 tmp = gen_rtx_REG (mode, regno);
3262 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3263 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3269 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3270 of this code is to classify each 8bytes of incoming argument by the register
3271 class and assign registers accordingly. */
3273 /* Return the union class of CLASS1 and CLASS2.
3274 See the x86-64 PS ABI for details. */
3276 static enum x86_64_reg_class
3277 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3279 /* Rule #1: If both classes are equal, this is the resulting class. */
3280 if (class1 == class2)
3283 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3285 if (class1 == X86_64_NO_CLASS)
3287 if (class2 == X86_64_NO_CLASS)
3290 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3291 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3292 return X86_64_MEMORY_CLASS;
3294 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3295 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3296 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3297 return X86_64_INTEGERSI_CLASS;
3298 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3299 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3300 return X86_64_INTEGER_CLASS;
3302 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3304 if (class1 == X86_64_X87_CLASS
3305 || class1 == X86_64_X87UP_CLASS
3306 || class1 == X86_64_COMPLEX_X87_CLASS
3307 || class2 == X86_64_X87_CLASS
3308 || class2 == X86_64_X87UP_CLASS
3309 || class2 == X86_64_COMPLEX_X87_CLASS)
3310 return X86_64_MEMORY_CLASS;
3312 /* Rule #6: Otherwise class SSE is used. */
3313 return X86_64_SSE_CLASS;
3316 /* Classify the argument of type TYPE and mode MODE.
3317 CLASSES will be filled by the register class used to pass each word
3318 of the operand. The number of words is returned. In case the parameter
3319 should be passed in memory, 0 is returned. As a special case for zero
3320 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3322 BIT_OFFSET is used internally for handling records and specifies offset
3323 of the offset in bits modulo 256 to avoid overflow cases.
3325 See the x86-64 PS ABI for details.
3329 classify_argument (enum machine_mode mode, tree type,
3330 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3332 HOST_WIDE_INT bytes =
3333 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3334 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3336 /* Variable sized entities are always passed/returned in memory. */
3340 if (mode != VOIDmode
3341 && targetm.calls.must_pass_in_stack (mode, type))
3344 if (type && AGGREGATE_TYPE_P (type))
3348 enum x86_64_reg_class subclasses[MAX_CLASSES];
3350 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3354 for (i = 0; i < words; i++)
3355 classes[i] = X86_64_NO_CLASS;
3357 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3358 signalize memory class, so handle it as special case. */
3361 classes[0] = X86_64_NO_CLASS;
3365 /* Classify each field of record and merge classes. */
3366 switch (TREE_CODE (type))
3369 /* And now merge the fields of structure. */
3370 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3372 if (TREE_CODE (field) == FIELD_DECL)
3376 if (TREE_TYPE (field) == error_mark_node)
3379 /* Bitfields are always classified as integer. Handle them
3380 early, since later code would consider them to be
3381 misaligned integers. */
3382 if (DECL_BIT_FIELD (field))
3384 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3385 i < ((int_bit_position (field) + (bit_offset % 64))
3386 + tree_low_cst (DECL_SIZE (field), 0)
3389 merge_classes (X86_64_INTEGER_CLASS,
3394 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3395 TREE_TYPE (field), subclasses,
3396 (int_bit_position (field)
3397 + bit_offset) % 256);
3400 for (i = 0; i < num; i++)
3403 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3405 merge_classes (subclasses[i], classes[i + pos]);
3413 /* Arrays are handled as small records. */
3416 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3417 TREE_TYPE (type), subclasses, bit_offset);
3421 /* The partial classes are now full classes. */
3422 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3423 subclasses[0] = X86_64_SSE_CLASS;
3424 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3425 subclasses[0] = X86_64_INTEGER_CLASS;
3427 for (i = 0; i < words; i++)
3428 classes[i] = subclasses[i % num];
3433 case QUAL_UNION_TYPE:
3434 /* Unions are similar to RECORD_TYPE but offset is always 0.
3436 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3438 if (TREE_CODE (field) == FIELD_DECL)
3442 if (TREE_TYPE (field) == error_mark_node)
3445 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3446 TREE_TYPE (field), subclasses,
3450 for (i = 0; i < num; i++)
3451 classes[i] = merge_classes (subclasses[i], classes[i]);
3460 /* Final merger cleanup. */
3461 for (i = 0; i < words; i++)
3463 /* If one class is MEMORY, everything should be passed in
3465 if (classes[i] == X86_64_MEMORY_CLASS)
3468 /* The X86_64_SSEUP_CLASS should be always preceded by
3469 X86_64_SSE_CLASS. */
3470 if (classes[i] == X86_64_SSEUP_CLASS
3471 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3472 classes[i] = X86_64_SSE_CLASS;
3474 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3475 if (classes[i] == X86_64_X87UP_CLASS
3476 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3477 classes[i] = X86_64_SSE_CLASS;
3482 /* Compute alignment needed. We align all types to natural boundaries with
3483 exception of XFmode that is aligned to 64bits. */
3484 if (mode != VOIDmode && mode != BLKmode)
3486 int mode_alignment = GET_MODE_BITSIZE (mode);
3489 mode_alignment = 128;
3490 else if (mode == XCmode)
3491 mode_alignment = 256;
3492 if (COMPLEX_MODE_P (mode))
3493 mode_alignment /= 2;
3494 /* Misaligned fields are always returned in memory. */
3495 if (bit_offset % mode_alignment)
3499 /* for V1xx modes, just use the base mode */
3500 if (VECTOR_MODE_P (mode)
3501 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3502 mode = GET_MODE_INNER (mode);
3504 /* Classification of atomic types. */
3509 classes[0] = X86_64_SSE_CLASS;
3512 classes[0] = X86_64_SSE_CLASS;
3513 classes[1] = X86_64_SSEUP_CLASS;
3522 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3523 classes[0] = X86_64_INTEGERSI_CLASS;
3525 classes[0] = X86_64_INTEGER_CLASS;
3529 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3534 if (!(bit_offset % 64))
3535 classes[0] = X86_64_SSESF_CLASS;
3537 classes[0] = X86_64_SSE_CLASS;
3540 classes[0] = X86_64_SSEDF_CLASS;
3543 classes[0] = X86_64_X87_CLASS;
3544 classes[1] = X86_64_X87UP_CLASS;
3547 classes[0] = X86_64_SSE_CLASS;
3548 classes[1] = X86_64_SSEUP_CLASS;
3551 classes[0] = X86_64_SSE_CLASS;
3554 classes[0] = X86_64_SSEDF_CLASS;
3555 classes[1] = X86_64_SSEDF_CLASS;
3558 classes[0] = X86_64_COMPLEX_X87_CLASS;
3561 /* This modes is larger than 16 bytes. */
3569 classes[0] = X86_64_SSE_CLASS;
3570 classes[1] = X86_64_SSEUP_CLASS;
3576 classes[0] = X86_64_SSE_CLASS;
3582 gcc_assert (VECTOR_MODE_P (mode));
3587 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3589 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3590 classes[0] = X86_64_INTEGERSI_CLASS;
3592 classes[0] = X86_64_INTEGER_CLASS;
3593 classes[1] = X86_64_INTEGER_CLASS;
3594 return 1 + (bytes > 8);
3598 /* Examine the argument and return set number of register required in each
3599 class. Return 0 iff parameter should be passed in memory. */
3601 examine_argument (enum machine_mode mode, tree type, int in_return,
3602 int *int_nregs, int *sse_nregs)
3604 enum x86_64_reg_class class[MAX_CLASSES];
3605 int n = classify_argument (mode, type, class, 0);
3611 for (n--; n >= 0; n--)
3614 case X86_64_INTEGER_CLASS:
3615 case X86_64_INTEGERSI_CLASS:
3618 case X86_64_SSE_CLASS:
3619 case X86_64_SSESF_CLASS:
3620 case X86_64_SSEDF_CLASS:
3623 case X86_64_NO_CLASS:
3624 case X86_64_SSEUP_CLASS:
3626 case X86_64_X87_CLASS:
3627 case X86_64_X87UP_CLASS:
3631 case X86_64_COMPLEX_X87_CLASS:
3632 return in_return ? 2 : 0;
3633 case X86_64_MEMORY_CLASS:
3639 /* Construct container for the argument used by GCC interface. See
3640 FUNCTION_ARG for the detailed description. */
3643 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3644 tree type, int in_return, int nintregs, int nsseregs,
3645 const int *intreg, int sse_regno)
3647 /* The following variables hold the static issued_error state. */
3648 static bool issued_sse_arg_error;
3649 static bool issued_sse_ret_error;
3650 static bool issued_x87_ret_error;
3652 enum machine_mode tmpmode;
3654 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3655 enum x86_64_reg_class class[MAX_CLASSES];
3659 int needed_sseregs, needed_intregs;
3660 rtx exp[MAX_CLASSES];
3663 n = classify_argument (mode, type, class, 0);
3666 if (!examine_argument (mode, type, in_return, &needed_intregs,
3669 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3672 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3673 some less clueful developer tries to use floating-point anyway. */
3674 if (needed_sseregs && !TARGET_SSE)
3678 if (!issued_sse_ret_error)
3680 error ("SSE register return with SSE disabled");
3681 issued_sse_ret_error = true;
3684 else if (!issued_sse_arg_error)
3686 error ("SSE register argument with SSE disabled");
3687 issued_sse_arg_error = true;
3692 /* Likewise, error if the ABI requires us to return values in the
3693 x87 registers and the user specified -mno-80387. */
3694 if (!TARGET_80387 && in_return)
3695 for (i = 0; i < n; i++)
3696 if (class[i] == X86_64_X87_CLASS
3697 || class[i] == X86_64_X87UP_CLASS
3698 || class[i] == X86_64_COMPLEX_X87_CLASS)
3700 if (!issued_x87_ret_error)
3702 error ("x87 register return with x87 disabled");
3703 issued_x87_ret_error = true;
3708 /* First construct simple cases. Avoid SCmode, since we want to use
3709 single register to pass this type. */
3710 if (n == 1 && mode != SCmode)
3713 case X86_64_INTEGER_CLASS:
3714 case X86_64_INTEGERSI_CLASS:
3715 return gen_rtx_REG (mode, intreg[0]);
3716 case X86_64_SSE_CLASS:
3717 case X86_64_SSESF_CLASS:
3718 case X86_64_SSEDF_CLASS:
3719 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3720 case X86_64_X87_CLASS:
3721 case X86_64_COMPLEX_X87_CLASS:
3722 return gen_rtx_REG (mode, FIRST_STACK_REG);
3723 case X86_64_NO_CLASS:
3724 /* Zero sized array, struct or class. */
3729 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3731 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3734 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3735 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3736 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3737 && class[1] == X86_64_INTEGER_CLASS
3738 && (mode == CDImode || mode == TImode || mode == TFmode)
3739 && intreg[0] + 1 == intreg[1])
3740 return gen_rtx_REG (mode, intreg[0]);
3742 /* Otherwise figure out the entries of the PARALLEL. */
3743 for (i = 0; i < n; i++)
3747 case X86_64_NO_CLASS:
3749 case X86_64_INTEGER_CLASS:
3750 case X86_64_INTEGERSI_CLASS:
3751 /* Merge TImodes on aligned occasions here too. */
3752 if (i * 8 + 8 > bytes)
3753 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3754 else if (class[i] == X86_64_INTEGERSI_CLASS)
3758 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3759 if (tmpmode == BLKmode)
3761 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3762 gen_rtx_REG (tmpmode, *intreg),
3766 case X86_64_SSESF_CLASS:
3767 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3768 gen_rtx_REG (SFmode,
3769 SSE_REGNO (sse_regno)),
3773 case X86_64_SSEDF_CLASS:
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (DFmode,
3776 SSE_REGNO (sse_regno)),
3780 case X86_64_SSE_CLASS:
3781 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3785 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3786 gen_rtx_REG (tmpmode,
3787 SSE_REGNO (sse_regno)),
3789 if (tmpmode == TImode)
3798 /* Empty aligned struct, union or class. */
3802 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3803 for (i = 0; i < nexps; i++)
3804 XVECEXP (ret, 0, i) = exp [i];
3808 /* Update the data in CUM to advance over an argument of mode MODE
3809 and data type TYPE. (TYPE is null for libcalls where that information
3810 may not be available.) */
3813 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3814 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3830 cum->words += words;
3831 cum->nregs -= words;
3832 cum->regno += words;
3834 if (cum->nregs <= 0)
3842 if (cum->float_in_sse < 2)
3845 if (cum->float_in_sse < 1)
3856 if (!type || !AGGREGATE_TYPE_P (type))
3858 cum->sse_words += words;
3859 cum->sse_nregs -= 1;
3860 cum->sse_regno += 1;
3861 if (cum->sse_nregs <= 0)
3873 if (!type || !AGGREGATE_TYPE_P (type))
3875 cum->mmx_words += words;
3876 cum->mmx_nregs -= 1;
3877 cum->mmx_regno += 1;
3878 if (cum->mmx_nregs <= 0)
3889 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3890 tree type, HOST_WIDE_INT words)
3892 int int_nregs, sse_nregs;
3894 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3895 cum->words += words;
3896 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3898 cum->nregs -= int_nregs;
3899 cum->sse_nregs -= sse_nregs;
3900 cum->regno += int_nregs;
3901 cum->sse_regno += sse_nregs;
3904 cum->words += words;
3908 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3909 HOST_WIDE_INT words)
3911 /* Otherwise, this should be passed indirect. */
3912 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3914 cum->words += words;
3923 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3924 tree type, int named ATTRIBUTE_UNUSED)
3926 HOST_WIDE_INT bytes, words;
3928 if (mode == BLKmode)
3929 bytes = int_size_in_bytes (type);
3931 bytes = GET_MODE_SIZE (mode);
3932 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3935 mode = type_natural_mode (type);
3937 if (TARGET_64BIT_MS_ABI)
3938 function_arg_advance_ms_64 (cum, bytes, words);
3939 else if (TARGET_64BIT)
3940 function_arg_advance_64 (cum, mode, type, words);
3942 function_arg_advance_32 (cum, mode, type, bytes, words);
3945 /* Define where to put the arguments to a function.
3946 Value is zero to push the argument on the stack,
3947 or a hard register in which to store the argument.
3949 MODE is the argument's machine mode.
3950 TYPE is the data type of the argument (as a tree).
3951 This is null for libcalls where that information may
3953 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3954 the preceding args and about the function being called.
3955 NAMED is nonzero if this argument is a named parameter
3956 (otherwise it is an extra parameter matching an ellipsis). */
3959 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3960 enum machine_mode orig_mode, tree type,
3961 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3963 static bool warnedsse, warnedmmx;
3965 /* Avoid the AL settings for the Unix64 ABI. */
3966 if (mode == VOIDmode)
3982 if (words <= cum->nregs)
3984 int regno = cum->regno;
3986 /* Fastcall allocates the first two DWORD (SImode) or
3987 smaller arguments to ECX and EDX. */
3990 if (mode == BLKmode || mode == DImode)
3993 /* ECX not EAX is the first allocated register. */
3997 return gen_rtx_REG (mode, regno);
4002 if (cum->float_in_sse < 2)
4005 if (cum->float_in_sse < 1)
4015 if (!type || !AGGREGATE_TYPE_P (type))
4017 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4020 warning (0, "SSE vector argument without SSE enabled "
4024 return gen_reg_or_parallel (mode, orig_mode,
4025 cum->sse_regno + FIRST_SSE_REG);
4033 if (!type || !AGGREGATE_TYPE_P (type))
4035 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4038 warning (0, "MMX vector argument without MMX enabled "
4042 return gen_reg_or_parallel (mode, orig_mode,
4043 cum->mmx_regno + FIRST_MMX_REG);
4052 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4053 enum machine_mode orig_mode, tree type)
4055 /* Handle a hidden AL argument containing number of registers
4056 for varargs x86-64 functions. */
4057 if (mode == VOIDmode)
4058 return GEN_INT (cum->maybe_vaarg
4059 ? (cum->sse_nregs < 0
4064 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4066 &x86_64_int_parameter_registers [cum->regno],
4071 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4072 enum machine_mode orig_mode, int named)
4076 /* Avoid the AL settings for the Unix64 ABI. */
4077 if (mode == VOIDmode)
4080 /* If we've run out of registers, it goes on the stack. */
4081 if (cum->nregs == 0)
4084 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4086 /* Only floating point modes are passed in anything but integer regs. */
4087 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4090 regno = cum->regno + FIRST_SSE_REG;
4095 /* Unnamed floating parameters are passed in both the
4096 SSE and integer registers. */
4097 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4098 t2 = gen_rtx_REG (mode, regno);
4099 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4100 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4101 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4105 return gen_reg_or_parallel (mode, orig_mode, regno);
4109 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4110 tree type, int named)
4112 enum machine_mode mode = omode;
4113 HOST_WIDE_INT bytes, words;
4115 if (mode == BLKmode)
4116 bytes = int_size_in_bytes (type);
4118 bytes = GET_MODE_SIZE (mode);
4119 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4121 /* To simplify the code below, represent vector types with a vector mode
4122 even if MMX/SSE are not active. */
4123 if (type && TREE_CODE (type) == VECTOR_TYPE)
4124 mode = type_natural_mode (type);
4126 if (TARGET_64BIT_MS_ABI)
4127 return function_arg_ms_64 (cum, mode, omode, named);
4128 else if (TARGET_64BIT)
4129 return function_arg_64 (cum, mode, omode, type);
4131 return function_arg_32 (cum, mode, omode, type, bytes, words);
4134 /* A C expression that indicates when an argument must be passed by
4135 reference. If nonzero for an argument, a copy of that argument is
4136 made in memory and a pointer to the argument is passed instead of
4137 the argument itself. The pointer is passed in whatever way is
4138 appropriate for passing a pointer to that type. */
4141 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4142 enum machine_mode mode ATTRIBUTE_UNUSED,
4143 tree type, bool named ATTRIBUTE_UNUSED)
4145 if (TARGET_64BIT_MS_ABI)
4149 /* Arrays are passed by reference. */
4150 if (TREE_CODE (type) == ARRAY_TYPE)
4153 if (AGGREGATE_TYPE_P (type))
4155 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4156 are passed by reference. */
4157 int el2 = exact_log2 (int_size_in_bytes (type));
4158 return !(el2 >= 0 && el2 <= 3);
4162 /* __m128 is passed by reference. */
4163 /* ??? How to handle complex? For now treat them as structs,
4164 and pass them by reference if they're too large. */
4165 if (GET_MODE_SIZE (mode) > 8)
4168 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4174 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4175 ABI. Only called if TARGET_SSE. */
4177 contains_128bit_aligned_vector_p (tree type)
4179 enum machine_mode mode = TYPE_MODE (type);
4180 if (SSE_REG_MODE_P (mode)
4181 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4183 if (TYPE_ALIGN (type) < 128)
4186 if (AGGREGATE_TYPE_P (type))
4188 /* Walk the aggregates recursively. */
4189 switch (TREE_CODE (type))
4193 case QUAL_UNION_TYPE:
4197 /* Walk all the structure fields. */
4198 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4200 if (TREE_CODE (field) == FIELD_DECL
4201 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4208 /* Just for use if some languages passes arrays by value. */
4209 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4220 /* Gives the alignment boundary, in bits, of an argument with the
4221 specified mode and type. */
4224 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4228 align = TYPE_ALIGN (type);
4230 align = GET_MODE_ALIGNMENT (mode);
4231 if (align < PARM_BOUNDARY)
4232 align = PARM_BOUNDARY;
4235 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4236 make an exception for SSE modes since these require 128bit
4239 The handling here differs from field_alignment. ICC aligns MMX
4240 arguments to 4 byte boundaries, while structure fields are aligned
4241 to 8 byte boundaries. */
4243 align = PARM_BOUNDARY;
4246 if (!SSE_REG_MODE_P (mode))
4247 align = PARM_BOUNDARY;
4251 if (!contains_128bit_aligned_vector_p (type))
4252 align = PARM_BOUNDARY;
4260 /* Return true if N is a possible register number of function value. */
4263 ix86_function_value_regno_p (int regno)
4270 case FIRST_FLOAT_REG:
4271 if (TARGET_64BIT_MS_ABI)
4273 return TARGET_FLOAT_RETURNS_IN_80387;
4279 if (TARGET_MACHO || TARGET_64BIT)
4287 /* Define how to find the value returned by a function.
4288 VALTYPE is the data type of the value (as a tree).
4289 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4290 otherwise, FUNC is 0. */
4293 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4294 tree fntype, tree fn)
4298 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4299 we normally prevent this case when mmx is not available. However
4300 some ABIs may require the result to be returned like DImode. */
4301 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4302 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4304 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4305 we prevent this case when sse is not available. However some ABIs
4306 may require the result to be returned like integer TImode. */
4307 else if (mode == TImode
4308 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4309 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4311 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4312 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4313 regno = FIRST_FLOAT_REG;
4315 /* Most things go in %eax. */
4318 /* Override FP return register with %xmm0 for local functions when
4319 SSE math is enabled or for functions with sseregparm attribute. */
4320 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4322 int sse_level = ix86_function_sseregparm (fntype, fn);
4323 if ((sse_level >= 1 && mode == SFmode)
4324 || (sse_level == 2 && mode == DFmode))
4325 regno = FIRST_SSE_REG;
4328 return gen_rtx_REG (orig_mode, regno);
4332 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4337 /* Handle libcalls, which don't provide a type node. */
4338 if (valtype == NULL)
4350 return gen_rtx_REG (mode, FIRST_SSE_REG);
4353 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4357 return gen_rtx_REG (mode, 0);
4361 ret = construct_container (mode, orig_mode, valtype, 1,
4362 REGPARM_MAX, SSE_REGPARM_MAX,
4363 x86_64_int_return_registers, 0);
4365 /* For zero sized structures, construct_container returns NULL, but we
4366 need to keep rest of compiler happy by returning meaningful value. */
4368 ret = gen_rtx_REG (orig_mode, 0);
4374 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4376 unsigned int regno = 0;
4380 if (mode == SFmode || mode == DFmode)
4381 regno = FIRST_SSE_REG;
4382 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4383 regno = FIRST_SSE_REG;
4386 return gen_rtx_REG (orig_mode, regno);
4390 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4391 enum machine_mode orig_mode, enum machine_mode mode)
4396 if (fntype_or_decl && DECL_P (fntype_or_decl))
4397 fn = fntype_or_decl;
4398 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4400 if (TARGET_64BIT_MS_ABI)
4401 return function_value_ms_64 (orig_mode, mode);
4402 else if (TARGET_64BIT)
4403 return function_value_64 (orig_mode, mode, valtype);
4405 return function_value_32 (orig_mode, mode, fntype, fn);
4409 ix86_function_value (tree valtype, tree fntype_or_decl,
4410 bool outgoing ATTRIBUTE_UNUSED)
4412 enum machine_mode mode, orig_mode;
4414 orig_mode = TYPE_MODE (valtype);
4415 mode = type_natural_mode (valtype);
4416 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4420 ix86_libcall_value (enum machine_mode mode)
4422 return ix86_function_value_1 (NULL, NULL, mode, mode);
4425 /* Return true iff type is returned in memory. */
4428 return_in_memory_32 (tree type, enum machine_mode mode)
4432 if (mode == BLKmode)
4435 size = int_size_in_bytes (type);
4437 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4440 if (VECTOR_MODE_P (mode) || mode == TImode)
4442 /* User-created vectors small enough to fit in EAX. */
4446 /* MMX/3dNow values are returned in MM0,
4447 except when it doesn't exits. */
4449 return (TARGET_MMX ? 0 : 1);
4451 /* SSE values are returned in XMM0, except when it doesn't exist. */
4453 return (TARGET_SSE ? 0 : 1);
4468 return_in_memory_64 (tree type, enum machine_mode mode)
4470 int needed_intregs, needed_sseregs;
4471 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4475 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4477 HOST_WIDE_INT size = int_size_in_bytes (type);
4479 /* __m128 and friends are returned in xmm0. */
4480 if (size == 16 && VECTOR_MODE_P (mode))
4483 /* Otherwise, the size must be exactly in [1248]. */
4484 return (size != 1 && size != 2 && size != 4 && size != 8);
4488 ix86_return_in_memory (tree type)
4490 enum machine_mode mode = type_natural_mode (type);
4492 if (TARGET_64BIT_MS_ABI)
4493 return return_in_memory_ms_64 (type, mode);
4494 else if (TARGET_64BIT)
4495 return return_in_memory_64 (type, mode);
4497 return return_in_memory_32 (type, mode);
4500 /* Return false iff TYPE is returned in memory. This version is used
4501 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4502 but differs notably in that when MMX is available, 8-byte vectors
4503 are returned in memory, rather than in MMX registers. */
4506 ix86_sol10_return_in_memory (tree type)
4509 enum machine_mode mode = type_natural_mode (type);
4512 return return_in_memory_64 (type, mode);
4514 if (mode == BLKmode)
4517 size = int_size_in_bytes (type);
4519 if (VECTOR_MODE_P (mode))
4521 /* Return in memory only if MMX registers *are* available. This
4522 seems backwards, but it is consistent with the existing
4529 else if (mode == TImode)
4531 else if (mode == XFmode)
4537 /* When returning SSE vector types, we have a choice of either
4538 (1) being abi incompatible with a -march switch, or
4539 (2) generating an error.
4540 Given no good solution, I think the safest thing is one warning.
4541 The user won't be able to use -Werror, but....
4543 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4544 called in response to actually generating a caller or callee that
4545 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4546 via aggregate_value_p for general type probing from tree-ssa. */
4549 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4551 static bool warnedsse, warnedmmx;
4553 if (!TARGET_64BIT && type)
4555 /* Look at the return type of the function, not the function type. */
4556 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4558 if (!TARGET_SSE && !warnedsse)
4561 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4564 warning (0, "SSE vector return without SSE enabled "
4569 if (!TARGET_MMX && !warnedmmx)
4571 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4574 warning (0, "MMX vector return without MMX enabled "
4584 /* Create the va_list data type. */
4587 ix86_build_builtin_va_list (void)
4589 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4591 /* For i386 we use plain pointer to argument area. */
4592 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4593 return build_pointer_type (char_type_node);
4595 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4596 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4598 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4599 unsigned_type_node);
4600 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4601 unsigned_type_node);
4602 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4604 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4607 va_list_gpr_counter_field = f_gpr;
4608 va_list_fpr_counter_field = f_fpr;
4610 DECL_FIELD_CONTEXT (f_gpr) = record;
4611 DECL_FIELD_CONTEXT (f_fpr) = record;
4612 DECL_FIELD_CONTEXT (f_ovf) = record;
4613 DECL_FIELD_CONTEXT (f_sav) = record;
4615 TREE_CHAIN (record) = type_decl;
4616 TYPE_NAME (record) = type_decl;
4617 TYPE_FIELDS (record) = f_gpr;
4618 TREE_CHAIN (f_gpr) = f_fpr;
4619 TREE_CHAIN (f_fpr) = f_ovf;
4620 TREE_CHAIN (f_ovf) = f_sav;
4622 layout_type (record);
4624 /* The correct type is an array type of one element. */
4625 return build_array_type (record, build_index_type (size_zero_node));
4628 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4631 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4641 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4644 /* Indicate to allocate space on the stack for varargs save area. */
4645 ix86_save_varrargs_registers = 1;
4646 cfun->stack_alignment_needed = 128;
4648 save_area = frame_pointer_rtx;
4649 set = get_varargs_alias_set ();
4651 for (i = cum->regno;
4653 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4656 mem = gen_rtx_MEM (Pmode,
4657 plus_constant (save_area, i * UNITS_PER_WORD));
4658 MEM_NOTRAP_P (mem) = 1;
4659 set_mem_alias_set (mem, set);
4660 emit_move_insn (mem, gen_rtx_REG (Pmode,
4661 x86_64_int_parameter_registers[i]));
4664 if (cum->sse_nregs && cfun->va_list_fpr_size)
4666 /* Now emit code to save SSE registers. The AX parameter contains number
4667 of SSE parameter registers used to call this function. We use
4668 sse_prologue_save insn template that produces computed jump across
4669 SSE saves. We need some preparation work to get this working. */
4671 label = gen_label_rtx ();
4672 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4674 /* Compute address to jump to :
4675 label - 5*eax + nnamed_sse_arguments*5 */
4676 tmp_reg = gen_reg_rtx (Pmode);
4677 nsse_reg = gen_reg_rtx (Pmode);
4678 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4679 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4680 gen_rtx_MULT (Pmode, nsse_reg,
4685 gen_rtx_CONST (DImode,
4686 gen_rtx_PLUS (DImode,
4688 GEN_INT (cum->sse_regno * 4))));
4690 emit_move_insn (nsse_reg, label_ref);
4691 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4693 /* Compute address of memory block we save into. We always use pointer
4694 pointing 127 bytes after first byte to store - this is needed to keep
4695 instruction size limited by 4 bytes. */
4696 tmp_reg = gen_reg_rtx (Pmode);
4697 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4698 plus_constant (save_area,
4699 8 * REGPARM_MAX + 127)));
4700 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4701 MEM_NOTRAP_P (mem) = 1;
4702 set_mem_alias_set (mem, set);
4703 set_mem_align (mem, BITS_PER_WORD);
4705 /* And finally do the dirty job! */
4706 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4707 GEN_INT (cum->sse_regno), label));
4712 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4714 int set = get_varargs_alias_set ();
4717 for (i = cum->regno; i < REGPARM_MAX; i++)
4721 mem = gen_rtx_MEM (Pmode,
4722 plus_constant (virtual_incoming_args_rtx,
4723 i * UNITS_PER_WORD));
4724 MEM_NOTRAP_P (mem) = 1;
4725 set_mem_alias_set (mem, set);
4727 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4728 emit_move_insn (mem, reg);
4733 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4734 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4737 CUMULATIVE_ARGS next_cum;
4741 /* This argument doesn't appear to be used anymore. Which is good,
4742 because the old code here didn't suppress rtl generation. */
4743 gcc_assert (!no_rtl);
4748 fntype = TREE_TYPE (current_function_decl);
4749 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4750 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4751 != void_type_node));
4753 /* For varargs, we do not want to skip the dummy va_dcl argument.
4754 For stdargs, we do want to skip the last named argument. */
4757 function_arg_advance (&next_cum, mode, type, 1);
4759 if (TARGET_64BIT_MS_ABI)
4760 setup_incoming_varargs_ms_64 (&next_cum);
4762 setup_incoming_varargs_64 (&next_cum);
4765 /* Implement va_start. */
4768 ix86_va_start (tree valist, rtx nextarg)
4770 HOST_WIDE_INT words, n_gpr, n_fpr;
4771 tree f_gpr, f_fpr, f_ovf, f_sav;
4772 tree gpr, fpr, ovf, sav, t;
4775 /* Only 64bit target needs something special. */
4776 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4778 std_expand_builtin_va_start (valist, nextarg);
4782 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4783 f_fpr = TREE_CHAIN (f_gpr);
4784 f_ovf = TREE_CHAIN (f_fpr);
4785 f_sav = TREE_CHAIN (f_ovf);
4787 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4788 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4789 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4790 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4791 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4793 /* Count number of gp and fp argument registers used. */
4794 words = current_function_args_info.words;
4795 n_gpr = current_function_args_info.regno;
4796 n_fpr = current_function_args_info.sse_regno;
4798 if (cfun->va_list_gpr_size)
4800 type = TREE_TYPE (gpr);
4801 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4802 build_int_cst (type, n_gpr * 8));
4803 TREE_SIDE_EFFECTS (t) = 1;
4804 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4807 if (cfun->va_list_fpr_size)
4809 type = TREE_TYPE (fpr);
4810 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4811 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4812 TREE_SIDE_EFFECTS (t) = 1;
4813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4816 /* Find the overflow area. */
4817 type = TREE_TYPE (ovf);
4818 t = make_tree (type, virtual_incoming_args_rtx);
4820 t = build2 (PLUS_EXPR, type, t,
4821 build_int_cst (type, words * UNITS_PER_WORD));
4822 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4823 TREE_SIDE_EFFECTS (t) = 1;
4824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4826 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4828 /* Find the register save area.
4829 Prologue of the function save it right above stack frame. */
4830 type = TREE_TYPE (sav);
4831 t = make_tree (type, frame_pointer_rtx);
4832 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4833 TREE_SIDE_EFFECTS (t) = 1;
4834 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4838 /* Implement va_arg. */
4841 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4843 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4844 tree f_gpr, f_fpr, f_ovf, f_sav;
4845 tree gpr, fpr, ovf, sav, t;
4847 tree lab_false, lab_over = NULL_TREE;
4852 enum machine_mode nat_mode;
4854 /* Only 64bit target needs something special. */
4855 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4856 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4858 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4859 f_fpr = TREE_CHAIN (f_gpr);
4860 f_ovf = TREE_CHAIN (f_fpr);
4861 f_sav = TREE_CHAIN (f_ovf);
4863 valist = build_va_arg_indirect_ref (valist);
4864 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4865 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4866 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4867 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4869 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4871 type = build_pointer_type (type);
4872 size = int_size_in_bytes (type);
4873 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4875 nat_mode = type_natural_mode (type);
4876 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4877 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4879 /* Pull the value out of the saved registers. */
4881 addr = create_tmp_var (ptr_type_node, "addr");
4882 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4886 int needed_intregs, needed_sseregs;
4888 tree int_addr, sse_addr;
4890 lab_false = create_artificial_label ();
4891 lab_over = create_artificial_label ();
4893 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4895 need_temp = (!REG_P (container)
4896 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4897 || TYPE_ALIGN (type) > 128));
4899 /* In case we are passing structure, verify that it is consecutive block
4900 on the register save area. If not we need to do moves. */
4901 if (!need_temp && !REG_P (container))
4903 /* Verify that all registers are strictly consecutive */
4904 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4908 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4910 rtx slot = XVECEXP (container, 0, i);
4911 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4912 || INTVAL (XEXP (slot, 1)) != i * 16)
4920 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4922 rtx slot = XVECEXP (container, 0, i);
4923 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4924 || INTVAL (XEXP (slot, 1)) != i * 8)
4936 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4937 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4938 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4939 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4942 /* First ensure that we fit completely in registers. */
4945 t = build_int_cst (TREE_TYPE (gpr),
4946 (REGPARM_MAX - needed_intregs + 1) * 8);
4947 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4948 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4949 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4950 gimplify_and_add (t, pre_p);
4954 t = build_int_cst (TREE_TYPE (fpr),
4955 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4957 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4958 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4959 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4960 gimplify_and_add (t, pre_p);
4963 /* Compute index to start of area used for integer regs. */
4966 /* int_addr = gpr + sav; */
4967 t = fold_convert (ptr_type_node, fold_convert (size_type_node, gpr));
4968 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4969 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4970 gimplify_and_add (t, pre_p);
4974 /* sse_addr = fpr + sav; */
4975 t = fold_convert (ptr_type_node, fold_convert (size_type_node, fpr));
4976 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4977 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4978 gimplify_and_add (t, pre_p);
4983 tree temp = create_tmp_var (type, "va_arg_tmp");
4986 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4987 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4988 gimplify_and_add (t, pre_p);
4990 for (i = 0; i < XVECLEN (container, 0); i++)
4992 rtx slot = XVECEXP (container, 0, i);
4993 rtx reg = XEXP (slot, 0);
4994 enum machine_mode mode = GET_MODE (reg);
4995 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4996 tree addr_type = build_pointer_type (piece_type);
4999 tree dest_addr, dest;
5001 if (SSE_REGNO_P (REGNO (reg)))
5003 src_addr = sse_addr;
5004 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5008 src_addr = int_addr;
5009 src_offset = REGNO (reg) * 8;
5011 src_addr = fold_convert (addr_type, src_addr);
5012 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
5013 build_int_cst (addr_type, src_offset));
5014 src = build_va_arg_indirect_ref (src_addr);
5016 dest_addr = fold_convert (addr_type, addr);
5017 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
5018 build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
5019 dest = build_va_arg_indirect_ref (dest_addr);
5021 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5022 gimplify_and_add (t, pre_p);
5028 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5029 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5030 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5031 gimplify_and_add (t, pre_p);
5035 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5036 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5037 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5038 gimplify_and_add (t, pre_p);
5041 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5042 gimplify_and_add (t, pre_p);
5044 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5045 append_to_statement_list (t, pre_p);
5048 /* ... otherwise out of the overflow area. */
5050 /* Care for on-stack alignment if needed. */
5051 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5052 || integer_zerop (TYPE_SIZE (type)))
5056 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5057 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
5058 build_int_cst (TREE_TYPE (ovf), align - 1));
5059 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5060 build_int_cst (TREE_TYPE (t), -align));
5062 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5064 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5065 gimplify_and_add (t2, pre_p);
5067 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5068 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
5069 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5070 gimplify_and_add (t, pre_p);
5074 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5075 append_to_statement_list (t, pre_p);
5078 ptrtype = build_pointer_type (type);
5079 addr = fold_convert (ptrtype, addr);
5082 addr = build_va_arg_indirect_ref (addr);
5083 return build_va_arg_indirect_ref (addr);
5086 /* Return nonzero if OPNUM's MEM should be matched
5087 in movabs* patterns. */
5090 ix86_check_movabs (rtx insn, int opnum)
5094 set = PATTERN (insn);
5095 if (GET_CODE (set) == PARALLEL)
5096 set = XVECEXP (set, 0, 0);
5097 gcc_assert (GET_CODE (set) == SET);
5098 mem = XEXP (set, opnum);
5099 while (GET_CODE (mem) == SUBREG)
5100 mem = SUBREG_REG (mem);
5101 gcc_assert (MEM_P (mem));
5102 return (volatile_ok || !MEM_VOLATILE_P (mem));
5105 /* Initialize the table of extra 80387 mathematical constants. */
5108 init_ext_80387_constants (void)
5110 static const char * cst[5] =
5112 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5113 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5114 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5115 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5116 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5120 for (i = 0; i < 5; i++)
5122 real_from_string (&ext_80387_constants_table[i], cst[i]);
5123 /* Ensure each constant is rounded to XFmode precision. */
5124 real_convert (&ext_80387_constants_table[i],
5125 XFmode, &ext_80387_constants_table[i]);
5128 ext_80387_constants_init = 1;
5131 /* Return true if the constant is something that can be loaded with
5132 a special instruction. */
5135 standard_80387_constant_p (rtx x)
5137 enum machine_mode mode = GET_MODE (x);
5141 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5144 if (x == CONST0_RTX (mode))
5146 if (x == CONST1_RTX (mode))
5149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5151 /* For XFmode constants, try to find a special 80387 instruction when
5152 optimizing for size or on those CPUs that benefit from them. */
5154 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5158 if (! ext_80387_constants_init)
5159 init_ext_80387_constants ();
5161 for (i = 0; i < 5; i++)
5162 if (real_identical (&r, &ext_80387_constants_table[i]))
5166 /* Load of the constant -0.0 or -1.0 will be split as
5167 fldz;fchs or fld1;fchs sequence. */
5168 if (real_isnegzero (&r))
5170 if (real_identical (&r, &dconstm1))
5176 /* Return the opcode of the special instruction to be used to load
5180 standard_80387_constant_opcode (rtx x)
5182 switch (standard_80387_constant_p (x))
5206 /* Return the CONST_DOUBLE representing the 80387 constant that is
5207 loaded by the specified special instruction. The argument IDX
5208 matches the return value from standard_80387_constant_p. */
5211 standard_80387_constant_rtx (int idx)
5215 if (! ext_80387_constants_init)
5216 init_ext_80387_constants ();
5232 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5236 /* Return 1 if mode is a valid mode for sse. */
5238 standard_sse_mode_p (enum machine_mode mode)
5255 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5258 standard_sse_constant_p (rtx x)
5260 enum machine_mode mode = GET_MODE (x);
5262 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5264 if (vector_all_ones_operand (x, mode)
5265 && standard_sse_mode_p (mode))
5266 return TARGET_SSE2 ? 2 : -1;
5271 /* Return the opcode of the special instruction to be used to load
5275 standard_sse_constant_opcode (rtx insn, rtx x)
5277 switch (standard_sse_constant_p (x))
5280 if (get_attr_mode (insn) == MODE_V4SF)
5281 return "xorps\t%0, %0";
5282 else if (get_attr_mode (insn) == MODE_V2DF)
5283 return "xorpd\t%0, %0";
5285 return "pxor\t%0, %0";
5287 return "pcmpeqd\t%0, %0";
5292 /* Returns 1 if OP contains a symbol reference */
5295 symbolic_reference_mentioned_p (rtx op)
5300 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5303 fmt = GET_RTX_FORMAT (GET_CODE (op));
5304 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5310 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5311 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5315 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5322 /* Return 1 if it is appropriate to emit `ret' instructions in the
5323 body of a function. Do this only if the epilogue is simple, needing a
5324 couple of insns. Prior to reloading, we can't tell how many registers
5325 must be saved, so return 0 then. Return 0 if there is no frame
5326 marker to de-allocate. */
5329 ix86_can_use_return_insn_p (void)
5331 struct ix86_frame frame;
5333 if (! reload_completed || frame_pointer_needed)
5336 /* Don't allow more than 32 pop, since that's all we can do
5337 with one instruction. */
5338 if (current_function_pops_args
5339 && current_function_args_size >= 32768)
5342 ix86_compute_frame_layout (&frame);
5343 return frame.to_allocate == 0 && frame.nregs == 0;
5346 /* Value should be nonzero if functions must have frame pointers.
5347 Zero means the frame pointer need not be set up (and parms may
5348 be accessed via the stack pointer) in functions that seem suitable. */
5351 ix86_frame_pointer_required (void)
5353 /* If we accessed previous frames, then the generated code expects
5354 to be able to access the saved ebp value in our frame. */
5355 if (cfun->machine->accesses_prev_frame)
5358 /* Several x86 os'es need a frame pointer for other reasons,
5359 usually pertaining to setjmp. */
5360 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5363 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5364 the frame pointer by default. Turn it back on now if we've not
5365 got a leaf function. */
5366 if (TARGET_OMIT_LEAF_FRAME_POINTER
5367 && (!current_function_is_leaf
5368 || ix86_current_function_calls_tls_descriptor))
5371 if (current_function_profile)
5377 /* Record that the current function accesses previous call frames. */
5380 ix86_setup_frame_addresses (void)
5382 cfun->machine->accesses_prev_frame = 1;
5385 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5386 # define USE_HIDDEN_LINKONCE 1
5388 # define USE_HIDDEN_LINKONCE 0
5391 static int pic_labels_used;
5393 /* Fills in the label name that should be used for a pc thunk for
5394 the given register. */
5397 get_pc_thunk_name (char name[32], unsigned int regno)
5399 gcc_assert (!TARGET_64BIT);
5401 if (USE_HIDDEN_LINKONCE)
5402 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5404 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5408 /* This function generates code for -fpic that loads %ebx with
5409 the return address of the caller and then returns. */
5412 ix86_file_end (void)
5417 for (regno = 0; regno < 8; ++regno)
5421 if (! ((pic_labels_used >> regno) & 1))
5424 get_pc_thunk_name (name, regno);
5429 switch_to_section (darwin_sections[text_coal_section]);
5430 fputs ("\t.weak_definition\t", asm_out_file);
5431 assemble_name (asm_out_file, name);
5432 fputs ("\n\t.private_extern\t", asm_out_file);
5433 assemble_name (asm_out_file, name);
5434 fputs ("\n", asm_out_file);
5435 ASM_OUTPUT_LABEL (asm_out_file, name);
5439 if (USE_HIDDEN_LINKONCE)
5443 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5445 TREE_PUBLIC (decl) = 1;
5446 TREE_STATIC (decl) = 1;
5447 DECL_ONE_ONLY (decl) = 1;
5449 (*targetm.asm_out.unique_section) (decl, 0);
5450 switch_to_section (get_named_section (decl, NULL, 0));
5452 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5453 fputs ("\t.hidden\t", asm_out_file);
5454 assemble_name (asm_out_file, name);
5455 fputc ('\n', asm_out_file);
5456 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5460 switch_to_section (text_section);
5461 ASM_OUTPUT_LABEL (asm_out_file, name);
5464 xops[0] = gen_rtx_REG (SImode, regno);
5465 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5466 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5467 output_asm_insn ("ret", xops);
5470 if (NEED_INDICATE_EXEC_STACK)
5471 file_end_indicate_exec_stack ();
5474 /* Emit code for the SET_GOT patterns. */
5477 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5483 if (TARGET_VXWORKS_RTP && flag_pic)
5485 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5486 xops[2] = gen_rtx_MEM (Pmode,
5487 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5488 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5490 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5491 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5492 an unadorned address. */
5493 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5494 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5495 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5499 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5501 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5503 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5506 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5508 output_asm_insn ("call\t%a2", xops);
5511 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5512 is what will be referenced by the Mach-O PIC subsystem. */
5514 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5517 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5518 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5521 output_asm_insn ("pop{l}\t%0", xops);
5526 get_pc_thunk_name (name, REGNO (dest));
5527 pic_labels_used |= 1 << REGNO (dest);
5529 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5530 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5531 output_asm_insn ("call\t%X2", xops);
5532 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5533 is what will be referenced by the Mach-O PIC subsystem. */
5536 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5538 targetm.asm_out.internal_label (asm_out_file, "L",
5539 CODE_LABEL_NUMBER (label));
5546 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5547 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5549 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5554 /* Generate an "push" pattern for input ARG. */
5559 return gen_rtx_SET (VOIDmode,
5561 gen_rtx_PRE_DEC (Pmode,
5562 stack_pointer_rtx)),
5566 /* Return >= 0 if there is an unused call-clobbered register available
5567 for the entire function. */
5570 ix86_select_alt_pic_regnum (void)
5572 if (current_function_is_leaf && !current_function_profile
5573 && !ix86_current_function_calls_tls_descriptor)
5576 for (i = 2; i >= 0; --i)
5577 if (!regs_ever_live[i])
5581 return INVALID_REGNUM;
5584 /* Return 1 if we need to save REGNO. */
5586 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5588 if (pic_offset_table_rtx
5589 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5590 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5591 || current_function_profile
5592 || current_function_calls_eh_return
5593 || current_function_uses_const_pool))
5595 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5600 if (current_function_calls_eh_return && maybe_eh_return)
5605 unsigned test = EH_RETURN_DATA_REGNO (i);
5606 if (test == INVALID_REGNUM)
5613 if (cfun->machine->force_align_arg_pointer
5614 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5617 return (regs_ever_live[regno]
5618 && !call_used_regs[regno]
5619 && !fixed_regs[regno]
5620 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5623 /* Return number of registers to be saved on the stack. */
5626 ix86_nsaved_regs (void)
5631 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5632 if (ix86_save_reg (regno, true))
5637 /* Return the offset between two registers, one to be eliminated, and the other
5638 its replacement, at the start of a routine. */
5641 ix86_initial_elimination_offset (int from, int to)
5643 struct ix86_frame frame;
5644 ix86_compute_frame_layout (&frame);
5646 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5647 return frame.hard_frame_pointer_offset;
5648 else if (from == FRAME_POINTER_REGNUM
5649 && to == HARD_FRAME_POINTER_REGNUM)
5650 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5653 gcc_assert (to == STACK_POINTER_REGNUM);
5655 if (from == ARG_POINTER_REGNUM)
5656 return frame.stack_pointer_offset;
5658 gcc_assert (from == FRAME_POINTER_REGNUM);
5659 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5663 /* Fill structure ix86_frame about frame of currently computed function. */
5666 ix86_compute_frame_layout (struct ix86_frame *frame)
5668 HOST_WIDE_INT total_size;
5669 unsigned int stack_alignment_needed;
5670 HOST_WIDE_INT offset;
5671 unsigned int preferred_alignment;
5672 HOST_WIDE_INT size = get_frame_size ();
5674 frame->nregs = ix86_nsaved_regs ();
5677 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5678 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5680 /* During reload iteration the amount of registers saved can change.
5681 Recompute the value as needed. Do not recompute when amount of registers
5682 didn't change as reload does multiple calls to the function and does not
5683 expect the decision to change within single iteration. */
5685 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5687 int count = frame->nregs;
5689 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5690 /* The fast prologue uses move instead of push to save registers. This
5691 is significantly longer, but also executes faster as modern hardware
5692 can execute the moves in parallel, but can't do that for push/pop.
5694 Be careful about choosing what prologue to emit: When function takes
5695 many instructions to execute we may use slow version as well as in
5696 case function is known to be outside hot spot (this is known with
5697 feedback only). Weight the size of function by number of registers
5698 to save as it is cheap to use one or two push instructions but very
5699 slow to use many of them. */
5701 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5702 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5703 || (flag_branch_probabilities
5704 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5705 cfun->machine->use_fast_prologue_epilogue = false;
5707 cfun->machine->use_fast_prologue_epilogue
5708 = !expensive_function_p (count);
5710 if (TARGET_PROLOGUE_USING_MOVE
5711 && cfun->machine->use_fast_prologue_epilogue)
5712 frame->save_regs_using_mov = true;
5714 frame->save_regs_using_mov = false;
5717 /* Skip return address and saved base pointer. */
5718 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5720 frame->hard_frame_pointer_offset = offset;
5722 /* Do some sanity checking of stack_alignment_needed and
5723 preferred_alignment, since i386 port is the only using those features
5724 that may break easily. */
5726 gcc_assert (!size || stack_alignment_needed);
5727 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5728 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5729 gcc_assert (stack_alignment_needed
5730 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5732 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5733 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5735 /* Register save area */
5736 offset += frame->nregs * UNITS_PER_WORD;
5739 if (ix86_save_varrargs_registers)
5741 offset += X86_64_VARARGS_SIZE;
5742 frame->va_arg_size = X86_64_VARARGS_SIZE;
5745 frame->va_arg_size = 0;
5747 /* Align start of frame for local function. */
5748 frame->padding1 = ((offset + stack_alignment_needed - 1)
5749 & -stack_alignment_needed) - offset;
5751 offset += frame->padding1;
5753 /* Frame pointer points here. */
5754 frame->frame_pointer_offset = offset;
5758 /* Add outgoing arguments area. Can be skipped if we eliminated
5759 all the function calls as dead code.
5760 Skipping is however impossible when function calls alloca. Alloca
5761 expander assumes that last current_function_outgoing_args_size
5762 of stack frame are unused. */
5763 if (ACCUMULATE_OUTGOING_ARGS
5764 && (!current_function_is_leaf || current_function_calls_alloca
5765 || ix86_current_function_calls_tls_descriptor))
5767 offset += current_function_outgoing_args_size;
5768 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5771 frame->outgoing_arguments_size = 0;
5773 /* Align stack boundary. Only needed if we're calling another function
5775 if (!current_function_is_leaf || current_function_calls_alloca
5776 || ix86_current_function_calls_tls_descriptor)
5777 frame->padding2 = ((offset + preferred_alignment - 1)
5778 & -preferred_alignment) - offset;
5780 frame->padding2 = 0;
5782 offset += frame->padding2;
5784 /* We've reached end of stack frame. */
5785 frame->stack_pointer_offset = offset;
5787 /* Size prologue needs to allocate. */
5788 frame->to_allocate =
5789 (size + frame->padding1 + frame->padding2
5790 + frame->outgoing_arguments_size + frame->va_arg_size);
5792 if ((!frame->to_allocate && frame->nregs <= 1)
5793 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5794 frame->save_regs_using_mov = false;
5796 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5797 && current_function_is_leaf
5798 && !ix86_current_function_calls_tls_descriptor)
5800 frame->red_zone_size = frame->to_allocate;
5801 if (frame->save_regs_using_mov)
5802 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5803 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5804 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5807 frame->red_zone_size = 0;
5808 frame->to_allocate -= frame->red_zone_size;
5809 frame->stack_pointer_offset -= frame->red_zone_size;
5811 fprintf (stderr, "\n");
5812 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5813 fprintf (stderr, "size: %ld\n", (long)size);
5814 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5815 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5816 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5817 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5818 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5819 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5820 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5821 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5822 (long)frame->hard_frame_pointer_offset);
5823 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5824 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5825 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5826 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5830 /* Emit code to save registers in the prologue. */
5833 ix86_emit_save_regs (void)
5838 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5839 if (ix86_save_reg (regno, true))
5841 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5842 RTX_FRAME_RELATED_P (insn) = 1;
5846 /* Emit code to save registers using MOV insns. First register
5847 is restored from POINTER + OFFSET. */
5849 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5854 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5855 if (ix86_save_reg (regno, true))
5857 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5859 gen_rtx_REG (Pmode, regno));
5860 RTX_FRAME_RELATED_P (insn) = 1;
5861 offset += UNITS_PER_WORD;
5865 /* Expand prologue or epilogue stack adjustment.
5866 The pattern exist to put a dependency on all ebp-based memory accesses.
5867 STYLE should be negative if instructions should be marked as frame related,
5868 zero if %r11 register is live and cannot be freely used and positive
5872 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5877 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5878 else if (x86_64_immediate_operand (offset, DImode))
5879 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5883 /* r11 is used by indirect sibcall return as well, set before the
5884 epilogue and used after the epilogue. ATM indirect sibcall
5885 shouldn't be used together with huge frame sizes in one
5886 function because of the frame_size check in sibcall.c. */
5888 r11 = gen_rtx_REG (DImode, R11_REG);
5889 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5891 RTX_FRAME_RELATED_P (insn) = 1;
5892 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5896 RTX_FRAME_RELATED_P (insn) = 1;
5899 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5902 ix86_internal_arg_pointer (void)
5904 bool has_force_align_arg_pointer =
5905 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5906 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5907 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5908 && DECL_NAME (current_function_decl)
5909 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5910 && DECL_FILE_SCOPE_P (current_function_decl))
5911 || ix86_force_align_arg_pointer
5912 || has_force_align_arg_pointer)
5914 /* Nested functions can't realign the stack due to a register
5916 if (DECL_CONTEXT (current_function_decl)
5917 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5919 if (ix86_force_align_arg_pointer)
5920 warning (0, "-mstackrealign ignored for nested functions");
5921 if (has_force_align_arg_pointer)
5922 error ("%s not supported for nested functions",
5923 ix86_force_align_arg_pointer_string);
5924 return virtual_incoming_args_rtx;
5926 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5927 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5930 return virtual_incoming_args_rtx;
5933 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5934 This is called from dwarf2out.c to emit call frame instructions
5935 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5937 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5939 rtx unspec = SET_SRC (pattern);
5940 gcc_assert (GET_CODE (unspec) == UNSPEC);
5944 case UNSPEC_REG_SAVE:
5945 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5946 SET_DEST (pattern));
5948 case UNSPEC_DEF_CFA:
5949 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5950 INTVAL (XVECEXP (unspec, 0, 0)));
5957 /* Expand the prologue into a bunch of separate insns. */
5960 ix86_expand_prologue (void)
5964 struct ix86_frame frame;
5965 HOST_WIDE_INT allocate;
5967 ix86_compute_frame_layout (&frame);
5969 if (cfun->machine->force_align_arg_pointer)
5973 /* Grab the argument pointer. */
5974 x = plus_constant (stack_pointer_rtx, 4);
5975 y = cfun->machine->force_align_arg_pointer;
5976 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5977 RTX_FRAME_RELATED_P (insn) = 1;
5979 /* The unwind info consists of two parts: install the fafp as the cfa,
5980 and record the fafp as the "save register" of the stack pointer.
5981 The later is there in order that the unwinder can see where it
5982 should restore the stack pointer across the and insn. */
5983 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5984 x = gen_rtx_SET (VOIDmode, y, x);
5985 RTX_FRAME_RELATED_P (x) = 1;
5986 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5988 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5989 RTX_FRAME_RELATED_P (y) = 1;
5990 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5991 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5992 REG_NOTES (insn) = x;
5994 /* Align the stack. */
5995 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5998 /* And here we cheat like madmen with the unwind info. We force the
5999 cfa register back to sp+4, which is exactly what it was at the
6000 start of the function. Re-pushing the return address results in
6001 the return at the same spot relative to the cfa, and thus is
6002 correct wrt the unwind info. */
6003 x = cfun->machine->force_align_arg_pointer;
6004 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6005 insn = emit_insn (gen_push (x));
6006 RTX_FRAME_RELATED_P (insn) = 1;
6009 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6010 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6011 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6012 REG_NOTES (insn) = x;
6015 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6016 slower on all targets. Also sdb doesn't like it. */
6018 if (frame_pointer_needed)
6020 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6021 RTX_FRAME_RELATED_P (insn) = 1;
6023 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6024 RTX_FRAME_RELATED_P (insn) = 1;
6027 allocate = frame.to_allocate;
6029 if (!frame.save_regs_using_mov)
6030 ix86_emit_save_regs ();
6032 allocate += frame.nregs * UNITS_PER_WORD;
6034 /* When using red zone we may start register saving before allocating
6035 the stack frame saving one cycle of the prologue. */
6036 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6037 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6038 : stack_pointer_rtx,
6039 -frame.nregs * UNITS_PER_WORD);
6043 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6045 GEN_INT (-allocate), -1);
6048 /* Only valid for Win32. */
6049 rtx eax = gen_rtx_REG (Pmode, 0);
6053 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6055 if (TARGET_64BIT_MS_ABI)
6058 eax_live = ix86_eax_live_at_start_p ();
6062 emit_insn (gen_push (eax));
6063 allocate -= UNITS_PER_WORD;
6066 emit_move_insn (eax, GEN_INT (allocate));
6069 insn = gen_allocate_stack_worker_64 (eax);
6071 insn = gen_allocate_stack_worker_32 (eax);
6072 insn = emit_insn (insn);
6073 RTX_FRAME_RELATED_P (insn) = 1;
6074 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6075 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6076 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6077 t, REG_NOTES (insn));
6081 if (frame_pointer_needed)
6082 t = plus_constant (hard_frame_pointer_rtx,
6085 - frame.nregs * UNITS_PER_WORD);
6087 t = plus_constant (stack_pointer_rtx, allocate);
6088 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6092 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6094 if (!frame_pointer_needed || !frame.to_allocate)
6095 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6097 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6098 -frame.nregs * UNITS_PER_WORD);
6101 pic_reg_used = false;
6102 if (pic_offset_table_rtx
6103 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6104 || current_function_profile))
6106 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6108 if (alt_pic_reg_used != INVALID_REGNUM)
6109 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6111 pic_reg_used = true;
6118 if (ix86_cmodel == CM_LARGE_PIC)
6120 rtx tmp_reg = gen_rtx_REG (DImode,
6121 FIRST_REX_INT_REG + 3 /* R11 */);
6122 rtx label = gen_label_rtx ();
6124 LABEL_PRESERVE_P (label) = 1;
6125 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6126 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6127 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6128 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6129 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6130 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6131 pic_offset_table_rtx, tmp_reg));
6134 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6137 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6139 /* Even with accurate pre-reload life analysis, we can wind up
6140 deleting all references to the pic register after reload.
6141 Consider if cross-jumping unifies two sides of a branch
6142 controlled by a comparison vs the only read from a global.
6143 In which case, allow the set_got to be deleted, though we're
6144 too late to do anything about the ebx save in the prologue. */
6145 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6148 /* Prevent function calls from be scheduled before the call to mcount.
6149 In the pic_reg_used case, make sure that the got load isn't deleted. */
6150 if (current_function_profile)
6151 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6154 /* Emit code to restore saved registers using MOV insns. First register
6155 is restored from POINTER + OFFSET. */
6157 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6158 int maybe_eh_return)
6161 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6163 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6164 if (ix86_save_reg (regno, maybe_eh_return))
6166 /* Ensure that adjust_address won't be forced to produce pointer
6167 out of range allowed by x86-64 instruction set. */
6168 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6172 r11 = gen_rtx_REG (DImode, R11_REG);
6173 emit_move_insn (r11, GEN_INT (offset));
6174 emit_insn (gen_adddi3 (r11, r11, pointer));
6175 base_address = gen_rtx_MEM (Pmode, r11);
6178 emit_move_insn (gen_rtx_REG (Pmode, regno),
6179 adjust_address (base_address, Pmode, offset));
6180 offset += UNITS_PER_WORD;
6184 /* Restore function stack, frame, and registers. */
6187 ix86_expand_epilogue (int style)
6190 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6191 struct ix86_frame frame;
6192 HOST_WIDE_INT offset;
6194 ix86_compute_frame_layout (&frame);
6196 /* Calculate start of saved registers relative to ebp. Special care
6197 must be taken for the normal return case of a function using
6198 eh_return: the eax and edx registers are marked as saved, but not
6199 restored along this path. */
6200 offset = frame.nregs;
6201 if (current_function_calls_eh_return && style != 2)
6203 offset *= -UNITS_PER_WORD;
6205 /* If we're only restoring one register and sp is not valid then
6206 using a move instruction to restore the register since it's
6207 less work than reloading sp and popping the register.
6209 The default code result in stack adjustment using add/lea instruction,
6210 while this code results in LEAVE instruction (or discrete equivalent),
6211 so it is profitable in some other cases as well. Especially when there
6212 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6213 and there is exactly one register to pop. This heuristic may need some
6214 tuning in future. */
6215 if ((!sp_valid && frame.nregs <= 1)
6216 || (TARGET_EPILOGUE_USING_MOVE
6217 && cfun->machine->use_fast_prologue_epilogue
6218 && (frame.nregs > 1 || frame.to_allocate))
6219 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6220 || (frame_pointer_needed && TARGET_USE_LEAVE
6221 && cfun->machine->use_fast_prologue_epilogue
6222 && frame.nregs == 1)
6223 || current_function_calls_eh_return)
6225 /* Restore registers. We can use ebp or esp to address the memory
6226 locations. If both are available, default to ebp, since offsets
6227 are known to be small. Only exception is esp pointing directly to the
6228 end of block of saved registers, where we may simplify addressing
6231 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6232 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6233 frame.to_allocate, style == 2);
6235 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6236 offset, style == 2);
6238 /* eh_return epilogues need %ecx added to the stack pointer. */
6241 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6243 if (frame_pointer_needed)
6245 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6246 tmp = plus_constant (tmp, UNITS_PER_WORD);
6247 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6249 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6250 emit_move_insn (hard_frame_pointer_rtx, tmp);
6252 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6257 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6258 tmp = plus_constant (tmp, (frame.to_allocate
6259 + frame.nregs * UNITS_PER_WORD));
6260 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6263 else if (!frame_pointer_needed)
6264 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6265 GEN_INT (frame.to_allocate
6266 + frame.nregs * UNITS_PER_WORD),
6268 /* If not an i386, mov & pop is faster than "leave". */
6269 else if (TARGET_USE_LEAVE || optimize_size
6270 || !cfun->machine->use_fast_prologue_epilogue)
6271 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6274 pro_epilogue_adjust_stack (stack_pointer_rtx,
6275 hard_frame_pointer_rtx,
6278 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6280 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6285 /* First step is to deallocate the stack frame so that we can
6286 pop the registers. */
6289 gcc_assert (frame_pointer_needed);
6290 pro_epilogue_adjust_stack (stack_pointer_rtx,
6291 hard_frame_pointer_rtx,
6292 GEN_INT (offset), style);
6294 else if (frame.to_allocate)
6295 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6296 GEN_INT (frame.to_allocate), style);
6298 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6299 if (ix86_save_reg (regno, false))
6302 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6304 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6306 if (frame_pointer_needed)
6308 /* Leave results in shorter dependency chains on CPUs that are
6309 able to grok it fast. */
6310 if (TARGET_USE_LEAVE)
6311 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6312 else if (TARGET_64BIT)
6313 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6315 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6319 if (cfun->machine->force_align_arg_pointer)
6321 emit_insn (gen_addsi3 (stack_pointer_rtx,
6322 cfun->machine->force_align_arg_pointer,
6326 /* Sibcall epilogues don't want a return instruction. */
6330 if (current_function_pops_args && current_function_args_size)
6332 rtx popc = GEN_INT (current_function_pops_args);
6334 /* i386 can only pop 64K bytes. If asked to pop more, pop
6335 return address, do explicit add, and jump indirectly to the
6338 if (current_function_pops_args >= 65536)
6340 rtx ecx = gen_rtx_REG (SImode, 2);
6342 /* There is no "pascal" calling convention in any 64bit ABI. */
6343 gcc_assert (!TARGET_64BIT);
6345 emit_insn (gen_popsi1 (ecx));
6346 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6347 emit_jump_insn (gen_return_indirect_internal (ecx));
6350 emit_jump_insn (gen_return_pop_internal (popc));
6353 emit_jump_insn (gen_return_internal ());
6356 /* Reset from the function's potential modifications. */
6359 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6360 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6362 if (pic_offset_table_rtx)
6363 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6365 /* Mach-O doesn't support labels at the end of objects, so if
6366 it looks like we might want one, insert a NOP. */
6368 rtx insn = get_last_insn ();
6371 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6372 insn = PREV_INSN (insn);
6376 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6377 fputs ("\tnop\n", file);
6383 /* Extract the parts of an RTL expression that is a valid memory address
6384 for an instruction. Return 0 if the structure of the address is
6385 grossly off. Return -1 if the address contains ASHIFT, so it is not
6386 strictly valid, but still used for computing length of lea instruction. */
6389 ix86_decompose_address (rtx addr, struct ix86_address *out)
6391 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6392 rtx base_reg, index_reg;
6393 HOST_WIDE_INT scale = 1;
6394 rtx scale_rtx = NULL_RTX;
6396 enum ix86_address_seg seg = SEG_DEFAULT;
6398 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6400 else if (GET_CODE (addr) == PLUS)
6410 addends[n++] = XEXP (op, 1);
6413 while (GET_CODE (op) == PLUS);
6418 for (i = n; i >= 0; --i)
6421 switch (GET_CODE (op))
6426 index = XEXP (op, 0);
6427 scale_rtx = XEXP (op, 1);
6431 if (XINT (op, 1) == UNSPEC_TP
6432 && TARGET_TLS_DIRECT_SEG_REFS
6433 && seg == SEG_DEFAULT)
6434 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6463 else if (GET_CODE (addr) == MULT)
6465 index = XEXP (addr, 0); /* index*scale */
6466 scale_rtx = XEXP (addr, 1);
6468 else if (GET_CODE (addr) == ASHIFT)
6472 /* We're called for lea too, which implements ashift on occasion. */
6473 index = XEXP (addr, 0);
6474 tmp = XEXP (addr, 1);
6475 if (!CONST_INT_P (tmp))
6477 scale = INTVAL (tmp);
6478 if ((unsigned HOST_WIDE_INT) scale > 3)
6484 disp = addr; /* displacement */
6486 /* Extract the integral value of scale. */
6489 if (!CONST_INT_P (scale_rtx))
6491 scale = INTVAL (scale_rtx);
6494 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6495 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6497 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6498 if (base_reg && index_reg && scale == 1
6499 && (index_reg == arg_pointer_rtx
6500 || index_reg == frame_pointer_rtx
6501 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6504 tmp = base, base = index, index = tmp;
6505 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6508 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6509 if ((base_reg == hard_frame_pointer_rtx
6510 || base_reg == frame_pointer_rtx
6511 || base_reg == arg_pointer_rtx) && !disp)
6514 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6515 Avoid this by transforming to [%esi+0]. */
6516 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6517 && base_reg && !index_reg && !disp
6519 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6522 /* Special case: encode reg+reg instead of reg*2. */
6523 if (!base && index && scale && scale == 2)
6524 base = index, base_reg = index_reg, scale = 1;
6526 /* Special case: scaling cannot be encoded without base or displacement. */
6527 if (!base && !disp && index && scale != 1)
6539 /* Return cost of the memory address x.
6540 For i386, it is better to use a complex address than let gcc copy
6541 the address into a reg and make a new pseudo. But not if the address
6542 requires to two regs - that would mean more pseudos with longer
6545 ix86_address_cost (rtx x)
6547 struct ix86_address parts;
6549 int ok = ix86_decompose_address (x, &parts);
6553 if (parts.base && GET_CODE (parts.base) == SUBREG)
6554 parts.base = SUBREG_REG (parts.base);
6555 if (parts.index && GET_CODE (parts.index) == SUBREG)
6556 parts.index = SUBREG_REG (parts.index);
6558 /* More complex memory references are better. */
6559 if (parts.disp && parts.disp != const0_rtx)
6561 if (parts.seg != SEG_DEFAULT)
6564 /* Attempt to minimize number of registers in the address. */
6566 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6568 && (!REG_P (parts.index)
6569 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6573 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6575 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6576 && parts.base != parts.index)
6579 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6580 since it's predecode logic can't detect the length of instructions
6581 and it degenerates to vector decoded. Increase cost of such
6582 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6583 to split such addresses or even refuse such addresses at all.
6585 Following addressing modes are affected:
6590 The first and last case may be avoidable by explicitly coding the zero in
6591 memory address, but I don't have AMD-K6 machine handy to check this
6595 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6596 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6597 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6603 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6604 this is used for to form addresses to local data when -fPIC is in
6608 darwin_local_data_pic (rtx disp)
6610 if (GET_CODE (disp) == MINUS)
6612 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6613 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6614 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6616 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6617 if (! strcmp (sym_name, "<pic base>"))
6625 /* Determine if a given RTX is a valid constant. We already know this
6626 satisfies CONSTANT_P. */
6629 legitimate_constant_p (rtx x)
6631 switch (GET_CODE (x))
6636 if (GET_CODE (x) == PLUS)
6638 if (!CONST_INT_P (XEXP (x, 1)))
6643 if (TARGET_MACHO && darwin_local_data_pic (x))
6646 /* Only some unspecs are valid as "constants". */
6647 if (GET_CODE (x) == UNSPEC)
6648 switch (XINT (x, 1))
6653 return TARGET_64BIT;
6656 x = XVECEXP (x, 0, 0);
6657 return (GET_CODE (x) == SYMBOL_REF
6658 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6660 x = XVECEXP (x, 0, 0);
6661 return (GET_CODE (x) == SYMBOL_REF
6662 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6667 /* We must have drilled down to a symbol. */
6668 if (GET_CODE (x) == LABEL_REF)
6670 if (GET_CODE (x) != SYMBOL_REF)
6675 /* TLS symbols are never valid. */
6676 if (SYMBOL_REF_TLS_MODEL (x))
6679 /* DLLIMPORT symbols are never valid. */
6680 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6681 && SYMBOL_REF_DLLIMPORT_P (x))
6686 if (GET_MODE (x) == TImode
6687 && x != CONST0_RTX (TImode)
6693 if (x == CONST0_RTX (GET_MODE (x)))
6701 /* Otherwise we handle everything else in the move patterns. */
6705 /* Determine if it's legal to put X into the constant pool. This
6706 is not possible for the address of thread-local symbols, which
6707 is checked above. */
6710 ix86_cannot_force_const_mem (rtx x)
6712 /* We can always put integral constants and vectors in memory. */
6713 switch (GET_CODE (x))
6723 return !legitimate_constant_p (x);
6726 /* Determine if a given RTX is a valid constant address. */
6729 constant_address_p (rtx x)
6731 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6734 /* Nonzero if the constant value X is a legitimate general operand
6735 when generating PIC code. It is given that flag_pic is on and
6736 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6739 legitimate_pic_operand_p (rtx x)
6743 switch (GET_CODE (x))
6746 inner = XEXP (x, 0);
6747 if (GET_CODE (inner) == PLUS
6748 && CONST_INT_P (XEXP (inner, 1)))
6749 inner = XEXP (inner, 0);
6751 /* Only some unspecs are valid as "constants". */
6752 if (GET_CODE (inner) == UNSPEC)
6753 switch (XINT (inner, 1))
6758 return TARGET_64BIT;
6760 x = XVECEXP (inner, 0, 0);
6761 return (GET_CODE (x) == SYMBOL_REF
6762 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6770 return legitimate_pic_address_disp_p (x);
6777 /* Determine if a given CONST RTX is a valid memory displacement
6781 legitimate_pic_address_disp_p (rtx disp)
6785 /* In 64bit mode we can allow direct addresses of symbols and labels
6786 when they are not dynamic symbols. */
6789 rtx op0 = disp, op1;
6791 switch (GET_CODE (disp))
6797 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6799 op0 = XEXP (XEXP (disp, 0), 0);
6800 op1 = XEXP (XEXP (disp, 0), 1);
6801 if (!CONST_INT_P (op1)
6802 || INTVAL (op1) >= 16*1024*1024
6803 || INTVAL (op1) < -16*1024*1024)
6805 if (GET_CODE (op0) == LABEL_REF)
6807 if (GET_CODE (op0) != SYMBOL_REF)
6812 /* TLS references should always be enclosed in UNSPEC. */
6813 if (SYMBOL_REF_TLS_MODEL (op0))
6815 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6816 && ix86_cmodel != CM_LARGE_PIC)
6824 if (GET_CODE (disp) != CONST)
6826 disp = XEXP (disp, 0);
6830 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6831 of GOT tables. We should not need these anyway. */
6832 if (GET_CODE (disp) != UNSPEC
6833 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6834 && XINT (disp, 1) != UNSPEC_GOTOFF
6835 && XINT (disp, 1) != UNSPEC_PLTOFF))
6838 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6839 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6845 if (GET_CODE (disp) == PLUS)
6847 if (!CONST_INT_P (XEXP (disp, 1)))
6849 disp = XEXP (disp, 0);
6853 if (TARGET_MACHO && darwin_local_data_pic (disp))
6856 if (GET_CODE (disp) != UNSPEC)
6859 switch (XINT (disp, 1))
6864 /* We need to check for both symbols and labels because VxWorks loads
6865 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6867 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6868 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6870 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6871 While ABI specify also 32bit relocation but we don't produce it in
6872 small PIC model at all. */
6873 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6874 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6876 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6878 case UNSPEC_GOTTPOFF:
6879 case UNSPEC_GOTNTPOFF:
6880 case UNSPEC_INDNTPOFF:
6883 disp = XVECEXP (disp, 0, 0);
6884 return (GET_CODE (disp) == SYMBOL_REF
6885 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6887 disp = XVECEXP (disp, 0, 0);
6888 return (GET_CODE (disp) == SYMBOL_REF
6889 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6891 disp = XVECEXP (disp, 0, 0);
6892 return (GET_CODE (disp) == SYMBOL_REF
6893 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6899 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6900 memory address for an instruction. The MODE argument is the machine mode
6901 for the MEM expression that wants to use this address.
6903 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6904 convert common non-canonical forms to canonical form so that they will
6908 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6909 rtx addr, int strict)
6911 struct ix86_address parts;
6912 rtx base, index, disp;
6913 HOST_WIDE_INT scale;
6914 const char *reason = NULL;
6915 rtx reason_rtx = NULL_RTX;
6917 if (ix86_decompose_address (addr, &parts) <= 0)
6919 reason = "decomposition failed";
6924 index = parts.index;
6926 scale = parts.scale;
6928 /* Validate base register.
6930 Don't allow SUBREG's that span more than a word here. It can lead to spill
6931 failures when the base is one word out of a two word structure, which is
6932 represented internally as a DImode int. */
6941 else if (GET_CODE (base) == SUBREG
6942 && REG_P (SUBREG_REG (base))
6943 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6945 reg = SUBREG_REG (base);
6948 reason = "base is not a register";
6952 if (GET_MODE (base) != Pmode)
6954 reason = "base is not in Pmode";
6958 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6959 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6961 reason = "base is not valid";
6966 /* Validate index register.
6968 Don't allow SUBREG's that span more than a word here -- same as above. */
6977 else if (GET_CODE (index) == SUBREG
6978 && REG_P (SUBREG_REG (index))
6979 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6981 reg = SUBREG_REG (index);
6984 reason = "index is not a register";
6988 if (GET_MODE (index) != Pmode)
6990 reason = "index is not in Pmode";
6994 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6995 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6997 reason = "index is not valid";
7002 /* Validate scale factor. */
7005 reason_rtx = GEN_INT (scale);
7008 reason = "scale without index";
7012 if (scale != 2 && scale != 4 && scale != 8)
7014 reason = "scale is not a valid multiplier";
7019 /* Validate displacement. */
7024 if (GET_CODE (disp) == CONST
7025 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7026 switch (XINT (XEXP (disp, 0), 1))
7028 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7029 used. While ABI specify also 32bit relocations, we don't produce
7030 them at all and use IP relative instead. */
7033 gcc_assert (flag_pic);
7035 goto is_legitimate_pic;
7036 reason = "64bit address unspec";
7039 case UNSPEC_GOTPCREL:
7040 gcc_assert (flag_pic);
7041 goto is_legitimate_pic;
7043 case UNSPEC_GOTTPOFF:
7044 case UNSPEC_GOTNTPOFF:
7045 case UNSPEC_INDNTPOFF:
7051 reason = "invalid address unspec";
7055 else if (SYMBOLIC_CONST (disp)
7059 && MACHOPIC_INDIRECT
7060 && !machopic_operand_p (disp)
7066 if (TARGET_64BIT && (index || base))
7068 /* foo@dtpoff(%rX) is ok. */
7069 if (GET_CODE (disp) != CONST
7070 || GET_CODE (XEXP (disp, 0)) != PLUS
7071 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7072 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7073 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7074 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7076 reason = "non-constant pic memory reference";
7080 else if (! legitimate_pic_address_disp_p (disp))
7082 reason = "displacement is an invalid pic construct";
7086 /* This code used to verify that a symbolic pic displacement
7087 includes the pic_offset_table_rtx register.
7089 While this is good idea, unfortunately these constructs may
7090 be created by "adds using lea" optimization for incorrect
7099 This code is nonsensical, but results in addressing
7100 GOT table with pic_offset_table_rtx base. We can't
7101 just refuse it easily, since it gets matched by
7102 "addsi3" pattern, that later gets split to lea in the
7103 case output register differs from input. While this
7104 can be handled by separate addsi pattern for this case
7105 that never results in lea, this seems to be easier and
7106 correct fix for crash to disable this test. */
7108 else if (GET_CODE (disp) != LABEL_REF
7109 && !CONST_INT_P (disp)
7110 && (GET_CODE (disp) != CONST
7111 || !legitimate_constant_p (disp))
7112 && (GET_CODE (disp) != SYMBOL_REF
7113 || !legitimate_constant_p (disp)))
7115 reason = "displacement is not constant";
7118 else if (TARGET_64BIT
7119 && !x86_64_immediate_operand (disp, VOIDmode))
7121 reason = "displacement is out of range";
7126 /* Everything looks valid. */
7133 /* Return a unique alias set for the GOT. */
7135 static HOST_WIDE_INT
7136 ix86_GOT_alias_set (void)
7138 static HOST_WIDE_INT set = -1;
7140 set = new_alias_set ();
7144 /* Return a legitimate reference for ORIG (an address) using the
7145 register REG. If REG is 0, a new pseudo is generated.
7147 There are two types of references that must be handled:
7149 1. Global data references must load the address from the GOT, via
7150 the PIC reg. An insn is emitted to do this load, and the reg is
7153 2. Static data references, constant pool addresses, and code labels
7154 compute the address as an offset from the GOT, whose base is in
7155 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7156 differentiate them from global data objects. The returned
7157 address is the PIC reg + an unspec constant.
7159 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7160 reg also appears in the address. */
7163 legitimize_pic_address (rtx orig, rtx reg)
7170 if (TARGET_MACHO && !TARGET_64BIT)
7173 reg = gen_reg_rtx (Pmode);
7174 /* Use the generic Mach-O PIC machinery. */
7175 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7179 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7181 else if (TARGET_64BIT
7182 && ix86_cmodel != CM_SMALL_PIC
7183 && gotoff_operand (addr, Pmode))
7186 /* This symbol may be referenced via a displacement from the PIC
7187 base address (@GOTOFF). */
7189 if (reload_in_progress)
7190 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7191 if (GET_CODE (addr) == CONST)
7192 addr = XEXP (addr, 0);
7193 if (GET_CODE (addr) == PLUS)
7195 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7197 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7200 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7201 new = gen_rtx_CONST (Pmode, new);
7203 tmpreg = gen_reg_rtx (Pmode);
7206 emit_move_insn (tmpreg, new);
7210 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7211 tmpreg, 1, OPTAB_DIRECT);
7214 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7216 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7218 /* This symbol may be referenced via a displacement from the PIC
7219 base address (@GOTOFF). */
7221 if (reload_in_progress)
7222 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7223 if (GET_CODE (addr) == CONST)
7224 addr = XEXP (addr, 0);
7225 if (GET_CODE (addr) == PLUS)
7227 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7229 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7232 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7233 new = gen_rtx_CONST (Pmode, new);
7234 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7238 emit_move_insn (reg, new);
7242 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7243 /* We can't use @GOTOFF for text labels on VxWorks;
7244 see gotoff_operand. */
7245 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7247 /* Given that we've already handled dllimport variables separately
7248 in legitimize_address, and all other variables should satisfy
7249 legitimate_pic_address_disp_p, we should never arrive here. */
7250 gcc_assert (!TARGET_64BIT_MS_ABI);
7252 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7254 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7255 new = gen_rtx_CONST (Pmode, new);
7256 new = gen_const_mem (Pmode, new);
7257 set_mem_alias_set (new, ix86_GOT_alias_set ());
7260 reg = gen_reg_rtx (Pmode);
7261 /* Use directly gen_movsi, otherwise the address is loaded
7262 into register for CSE. We don't want to CSE this addresses,
7263 instead we CSE addresses from the GOT table, so skip this. */
7264 emit_insn (gen_movsi (reg, new));
7269 /* This symbol must be referenced via a load from the
7270 Global Offset Table (@GOT). */
7272 if (reload_in_progress)
7273 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7274 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7275 new = gen_rtx_CONST (Pmode, new);
7277 new = force_reg (Pmode, new);
7278 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7279 new = gen_const_mem (Pmode, new);
7280 set_mem_alias_set (new, ix86_GOT_alias_set ());
7283 reg = gen_reg_rtx (Pmode);
7284 emit_move_insn (reg, new);
7290 if (CONST_INT_P (addr)
7291 && !x86_64_immediate_operand (addr, VOIDmode))
7295 emit_move_insn (reg, addr);
7299 new = force_reg (Pmode, addr);
7301 else if (GET_CODE (addr) == CONST)
7303 addr = XEXP (addr, 0);
7305 /* We must match stuff we generate before. Assume the only
7306 unspecs that can get here are ours. Not that we could do
7307 anything with them anyway.... */
7308 if (GET_CODE (addr) == UNSPEC
7309 || (GET_CODE (addr) == PLUS
7310 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7312 gcc_assert (GET_CODE (addr) == PLUS);
7314 if (GET_CODE (addr) == PLUS)
7316 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7318 /* Check first to see if this is a constant offset from a @GOTOFF
7319 symbol reference. */
7320 if (gotoff_operand (op0, Pmode)
7321 && CONST_INT_P (op1))
7325 if (reload_in_progress)
7326 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7327 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7329 new = gen_rtx_PLUS (Pmode, new, op1);
7330 new = gen_rtx_CONST (Pmode, new);
7331 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7335 emit_move_insn (reg, new);
7341 if (INTVAL (op1) < -16*1024*1024
7342 || INTVAL (op1) >= 16*1024*1024)
7344 if (!x86_64_immediate_operand (op1, Pmode))
7345 op1 = force_reg (Pmode, op1);
7346 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7352 base = legitimize_pic_address (XEXP (addr, 0), reg);
7353 new = legitimize_pic_address (XEXP (addr, 1),
7354 base == reg ? NULL_RTX : reg);
7356 if (CONST_INT_P (new))
7357 new = plus_constant (base, INTVAL (new));
7360 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7362 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7363 new = XEXP (new, 1);
7365 new = gen_rtx_PLUS (Pmode, base, new);
7373 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7376 get_thread_pointer (int to_reg)
7380 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7384 reg = gen_reg_rtx (Pmode);
7385 insn = gen_rtx_SET (VOIDmode, reg, tp);
7386 insn = emit_insn (insn);
7391 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7392 false if we expect this to be used for a memory address and true if
7393 we expect to load the address into a register. */
7396 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7398 rtx dest, base, off, pic, tp;
7403 case TLS_MODEL_GLOBAL_DYNAMIC:
7404 dest = gen_reg_rtx (Pmode);
7405 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7407 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7409 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7412 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7413 insns = get_insns ();
7416 CONST_OR_PURE_CALL_P (insns) = 1;
7417 emit_libcall_block (insns, dest, rax, x);
7419 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7420 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7422 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7424 if (TARGET_GNU2_TLS)
7426 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7428 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7432 case TLS_MODEL_LOCAL_DYNAMIC:
7433 base = gen_reg_rtx (Pmode);
7434 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7436 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7438 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7441 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7442 insns = get_insns ();
7445 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7446 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7447 CONST_OR_PURE_CALL_P (insns) = 1;
7448 emit_libcall_block (insns, base, rax, note);
7450 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7451 emit_insn (gen_tls_local_dynamic_base_64 (base));
7453 emit_insn (gen_tls_local_dynamic_base_32 (base));
7455 if (TARGET_GNU2_TLS)
7457 rtx x = ix86_tls_module_base ();
7459 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7460 gen_rtx_MINUS (Pmode, x, tp));
7463 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7464 off = gen_rtx_CONST (Pmode, off);
7466 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7468 if (TARGET_GNU2_TLS)
7470 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7472 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7477 case TLS_MODEL_INITIAL_EXEC:
7481 type = UNSPEC_GOTNTPOFF;
7485 if (reload_in_progress)
7486 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7487 pic = pic_offset_table_rtx;
7488 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7490 else if (!TARGET_ANY_GNU_TLS)
7492 pic = gen_reg_rtx (Pmode);
7493 emit_insn (gen_set_got (pic));
7494 type = UNSPEC_GOTTPOFF;
7499 type = UNSPEC_INDNTPOFF;
7502 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7503 off = gen_rtx_CONST (Pmode, off);
7505 off = gen_rtx_PLUS (Pmode, pic, off);
7506 off = gen_const_mem (Pmode, off);
7507 set_mem_alias_set (off, ix86_GOT_alias_set ());
7509 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7511 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7512 off = force_reg (Pmode, off);
7513 return gen_rtx_PLUS (Pmode, base, off);
7517 base = get_thread_pointer (true);
7518 dest = gen_reg_rtx (Pmode);
7519 emit_insn (gen_subsi3 (dest, base, off));
7523 case TLS_MODEL_LOCAL_EXEC:
7524 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7525 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7526 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7527 off = gen_rtx_CONST (Pmode, off);
7529 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7531 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7532 return gen_rtx_PLUS (Pmode, base, off);
7536 base = get_thread_pointer (true);
7537 dest = gen_reg_rtx (Pmode);
7538 emit_insn (gen_subsi3 (dest, base, off));
7549 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7552 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7553 htab_t dllimport_map;
7556 get_dllimport_decl (tree decl)
7558 struct tree_map *h, in;
7562 size_t namelen, prefixlen;
7568 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7570 in.hash = htab_hash_pointer (decl);
7571 in.base.from = decl;
7572 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7577 *loc = h = ggc_alloc (sizeof (struct tree_map));
7579 h->base.from = decl;
7580 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7581 DECL_ARTIFICIAL (to) = 1;
7582 DECL_IGNORED_P (to) = 1;
7583 DECL_EXTERNAL (to) = 1;
7584 TREE_READONLY (to) = 1;
7586 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7587 name = targetm.strip_name_encoding (name);
7588 if (name[0] == FASTCALL_PREFIX)
7594 prefix = "*__imp__";
7596 namelen = strlen (name);
7597 prefixlen = strlen (prefix);
7598 imp_name = alloca (namelen + prefixlen + 1);
7599 memcpy (imp_name, prefix, prefixlen);
7600 memcpy (imp_name + prefixlen, name, namelen + 1);
7602 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7603 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7604 SET_SYMBOL_REF_DECL (rtl, to);
7605 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7607 rtl = gen_const_mem (Pmode, rtl);
7608 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7610 SET_DECL_RTL (to, rtl);
7615 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7616 true if we require the result be a register. */
7619 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7624 gcc_assert (SYMBOL_REF_DECL (symbol));
7625 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7627 x = DECL_RTL (imp_decl);
7629 x = force_reg (Pmode, x);
7633 /* Try machine-dependent ways of modifying an illegitimate address
7634 to be legitimate. If we find one, return the new, valid address.
7635 This macro is used in only one place: `memory_address' in explow.c.
7637 OLDX is the address as it was before break_out_memory_refs was called.
7638 In some cases it is useful to look at this to decide what needs to be done.
7640 MODE and WIN are passed so that this macro can use
7641 GO_IF_LEGITIMATE_ADDRESS.
7643 It is always safe for this macro to do nothing. It exists to recognize
7644 opportunities to optimize the output.
7646 For the 80386, we handle X+REG by loading X into a register R and
7647 using R+REG. R will go in a general reg and indexing will be used.
7648 However, if REG is a broken-out memory address or multiplication,
7649 nothing needs to be done because REG can certainly go in a general reg.
7651 When -fpic is used, special handling is needed for symbolic references.
7652 See comments by legitimize_pic_address in i386.c for details. */
7655 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7660 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7662 return legitimize_tls_address (x, log, false);
7663 if (GET_CODE (x) == CONST
7664 && GET_CODE (XEXP (x, 0)) == PLUS
7665 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7666 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7668 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7669 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7672 if (flag_pic && SYMBOLIC_CONST (x))
7673 return legitimize_pic_address (x, 0);
7675 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7677 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7678 return legitimize_dllimport_symbol (x, true);
7679 if (GET_CODE (x) == CONST
7680 && GET_CODE (XEXP (x, 0)) == PLUS
7681 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7682 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7684 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7685 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7689 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7690 if (GET_CODE (x) == ASHIFT
7691 && CONST_INT_P (XEXP (x, 1))
7692 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7695 log = INTVAL (XEXP (x, 1));
7696 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7697 GEN_INT (1 << log));
7700 if (GET_CODE (x) == PLUS)
7702 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7704 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7705 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7706 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7709 log = INTVAL (XEXP (XEXP (x, 0), 1));
7710 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7711 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7712 GEN_INT (1 << log));
7715 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7716 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7717 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7720 log = INTVAL (XEXP (XEXP (x, 1), 1));
7721 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7722 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7723 GEN_INT (1 << log));
7726 /* Put multiply first if it isn't already. */
7727 if (GET_CODE (XEXP (x, 1)) == MULT)
7729 rtx tmp = XEXP (x, 0);
7730 XEXP (x, 0) = XEXP (x, 1);
7735 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7736 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7737 created by virtual register instantiation, register elimination, and
7738 similar optimizations. */
7739 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7742 x = gen_rtx_PLUS (Pmode,
7743 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7744 XEXP (XEXP (x, 1), 0)),
7745 XEXP (XEXP (x, 1), 1));
7749 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7750 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7751 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7752 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7753 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7754 && CONSTANT_P (XEXP (x, 1)))
7757 rtx other = NULL_RTX;
7759 if (CONST_INT_P (XEXP (x, 1)))
7761 constant = XEXP (x, 1);
7762 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7764 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7766 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7767 other = XEXP (x, 1);
7775 x = gen_rtx_PLUS (Pmode,
7776 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7777 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7778 plus_constant (other, INTVAL (constant)));
7782 if (changed && legitimate_address_p (mode, x, FALSE))
7785 if (GET_CODE (XEXP (x, 0)) == MULT)
7788 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7791 if (GET_CODE (XEXP (x, 1)) == MULT)
7794 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7798 && REG_P (XEXP (x, 1))
7799 && REG_P (XEXP (x, 0)))
7802 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7805 x = legitimize_pic_address (x, 0);
7808 if (changed && legitimate_address_p (mode, x, FALSE))
7811 if (REG_P (XEXP (x, 0)))
7813 rtx temp = gen_reg_rtx (Pmode);
7814 rtx val = force_operand (XEXP (x, 1), temp);
7816 emit_move_insn (temp, val);
7822 else if (REG_P (XEXP (x, 1)))
7824 rtx temp = gen_reg_rtx (Pmode);
7825 rtx val = force_operand (XEXP (x, 0), temp);
7827 emit_move_insn (temp, val);
7837 /* Print an integer constant expression in assembler syntax. Addition
7838 and subtraction are the only arithmetic that may appear in these
7839 expressions. FILE is the stdio stream to write to, X is the rtx, and
7840 CODE is the operand print code from the output string. */
7843 output_pic_addr_const (FILE *file, rtx x, int code)
7847 switch (GET_CODE (x))
7850 gcc_assert (flag_pic);
7855 if (! TARGET_MACHO || TARGET_64BIT)
7856 output_addr_const (file, x);
7859 const char *name = XSTR (x, 0);
7861 /* Mark the decl as referenced so that cgraph will
7862 output the function. */
7863 if (SYMBOL_REF_DECL (x))
7864 mark_decl_referenced (SYMBOL_REF_DECL (x));
7867 if (MACHOPIC_INDIRECT
7868 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7869 name = machopic_indirection_name (x, /*stub_p=*/true);
7871 assemble_name (file, name);
7873 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7874 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7875 fputs ("@PLT", file);
7882 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7883 assemble_name (asm_out_file, buf);
7887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7891 /* This used to output parentheses around the expression,
7892 but that does not work on the 386 (either ATT or BSD assembler). */
7893 output_pic_addr_const (file, XEXP (x, 0), code);
7897 if (GET_MODE (x) == VOIDmode)
7899 /* We can use %d if the number is <32 bits and positive. */
7900 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7901 fprintf (file, "0x%lx%08lx",
7902 (unsigned long) CONST_DOUBLE_HIGH (x),
7903 (unsigned long) CONST_DOUBLE_LOW (x));
7905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7908 /* We can't handle floating point constants;
7909 PRINT_OPERAND must handle them. */
7910 output_operand_lossage ("floating constant misused");
7914 /* Some assemblers need integer constants to appear first. */
7915 if (CONST_INT_P (XEXP (x, 0)))
7917 output_pic_addr_const (file, XEXP (x, 0), code);
7919 output_pic_addr_const (file, XEXP (x, 1), code);
7923 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7924 output_pic_addr_const (file, XEXP (x, 1), code);
7926 output_pic_addr_const (file, XEXP (x, 0), code);
7932 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7933 output_pic_addr_const (file, XEXP (x, 0), code);
7935 output_pic_addr_const (file, XEXP (x, 1), code);
7937 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7941 gcc_assert (XVECLEN (x, 0) == 1);
7942 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7943 switch (XINT (x, 1))
7946 fputs ("@GOT", file);
7949 fputs ("@GOTOFF", file);
7952 fputs ("@PLTOFF", file);
7954 case UNSPEC_GOTPCREL:
7955 fputs ("@GOTPCREL(%rip)", file);
7957 case UNSPEC_GOTTPOFF:
7958 /* FIXME: This might be @TPOFF in Sun ld too. */
7959 fputs ("@GOTTPOFF", file);
7962 fputs ("@TPOFF", file);
7966 fputs ("@TPOFF", file);
7968 fputs ("@NTPOFF", file);
7971 fputs ("@DTPOFF", file);
7973 case UNSPEC_GOTNTPOFF:
7975 fputs ("@GOTTPOFF(%rip)", file);
7977 fputs ("@GOTNTPOFF", file);
7979 case UNSPEC_INDNTPOFF:
7980 fputs ("@INDNTPOFF", file);
7983 output_operand_lossage ("invalid UNSPEC as operand");
7989 output_operand_lossage ("invalid expression as operand");
7993 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7994 We need to emit DTP-relative relocations. */
7996 static void ATTRIBUTE_UNUSED
7997 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7999 fputs (ASM_LONG, file);
8000 output_addr_const (file, x);
8001 fputs ("@DTPOFF", file);
8007 fputs (", 0", file);
8014 /* In the name of slightly smaller debug output, and to cater to
8015 general assembler lossage, recognize PIC+GOTOFF and turn it back
8016 into a direct symbol reference.
8018 On Darwin, this is necessary to avoid a crash, because Darwin
8019 has a different PIC label for each routine but the DWARF debugging
8020 information is not associated with any particular routine, so it's
8021 necessary to remove references to the PIC label from RTL stored by
8022 the DWARF output code. */
8025 ix86_delegitimize_address (rtx orig_x)
8028 /* reg_addend is NULL or a multiple of some register. */
8029 rtx reg_addend = NULL_RTX;
8030 /* const_addend is NULL or a const_int. */
8031 rtx const_addend = NULL_RTX;
8032 /* This is the result, or NULL. */
8033 rtx result = NULL_RTX;
8040 if (GET_CODE (x) != CONST
8041 || GET_CODE (XEXP (x, 0)) != UNSPEC
8042 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8045 return XVECEXP (XEXP (x, 0), 0, 0);
8048 if (GET_CODE (x) != PLUS
8049 || GET_CODE (XEXP (x, 1)) != CONST)
8052 if (REG_P (XEXP (x, 0))
8053 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8054 /* %ebx + GOT/GOTOFF */
8056 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8058 /* %ebx + %reg * scale + GOT/GOTOFF */
8059 reg_addend = XEXP (x, 0);
8060 if (REG_P (XEXP (reg_addend, 0))
8061 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8062 reg_addend = XEXP (reg_addend, 1);
8063 else if (REG_P (XEXP (reg_addend, 1))
8064 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8065 reg_addend = XEXP (reg_addend, 0);
8068 if (!REG_P (reg_addend)
8069 && GET_CODE (reg_addend) != MULT
8070 && GET_CODE (reg_addend) != ASHIFT)
8076 x = XEXP (XEXP (x, 1), 0);
8077 if (GET_CODE (x) == PLUS
8078 && CONST_INT_P (XEXP (x, 1)))
8080 const_addend = XEXP (x, 1);
8084 if (GET_CODE (x) == UNSPEC
8085 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8086 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8087 result = XVECEXP (x, 0, 0);
8089 if (TARGET_MACHO && darwin_local_data_pic (x)
8091 result = XEXP (x, 0);
8097 result = gen_rtx_PLUS (Pmode, result, const_addend);
8099 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8103 /* If X is a machine specific address (i.e. a symbol or label being
8104 referenced as a displacement from the GOT implemented using an
8105 UNSPEC), then return the base term. Otherwise return X. */
8108 ix86_find_base_term (rtx x)
8114 if (GET_CODE (x) != CONST)
8117 if (GET_CODE (term) == PLUS
8118 && (CONST_INT_P (XEXP (term, 1))
8119 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8120 term = XEXP (term, 0);
8121 if (GET_CODE (term) != UNSPEC
8122 || XINT (term, 1) != UNSPEC_GOTPCREL)
8125 term = XVECEXP (term, 0, 0);
8127 if (GET_CODE (term) != SYMBOL_REF
8128 && GET_CODE (term) != LABEL_REF)
8134 term = ix86_delegitimize_address (x);
8136 if (GET_CODE (term) != SYMBOL_REF
8137 && GET_CODE (term) != LABEL_REF)
8144 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8149 if (mode == CCFPmode || mode == CCFPUmode)
8151 enum rtx_code second_code, bypass_code;
8152 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8153 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8154 code = ix86_fp_compare_code_to_integer (code);
8158 code = reverse_condition (code);
8169 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8173 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8174 Those same assemblers have the same but opposite lossage on cmov. */
8175 gcc_assert (mode == CCmode);
8176 suffix = fp ? "nbe" : "a";
8196 gcc_assert (mode == CCmode);
8218 gcc_assert (mode == CCmode);
8219 suffix = fp ? "nb" : "ae";
8222 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8226 gcc_assert (mode == CCmode);
8230 suffix = fp ? "u" : "p";
8233 suffix = fp ? "nu" : "np";
8238 fputs (suffix, file);
8241 /* Print the name of register X to FILE based on its machine mode and number.
8242 If CODE is 'w', pretend the mode is HImode.
8243 If CODE is 'b', pretend the mode is QImode.
8244 If CODE is 'k', pretend the mode is SImode.
8245 If CODE is 'q', pretend the mode is DImode.
8246 If CODE is 'h', pretend the reg is the 'high' byte register.
8247 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8250 print_reg (rtx x, int code, FILE *file)
8252 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8253 && REGNO (x) != FRAME_POINTER_REGNUM
8254 && REGNO (x) != FLAGS_REG
8255 && REGNO (x) != FPSR_REG
8256 && REGNO (x) != FPCR_REG);
8258 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8261 if (code == 'w' || MMX_REG_P (x))
8263 else if (code == 'b')
8265 else if (code == 'k')
8267 else if (code == 'q')
8269 else if (code == 'y')
8271 else if (code == 'h')
8274 code = GET_MODE_SIZE (GET_MODE (x));
8276 /* Irritatingly, AMD extended registers use different naming convention
8277 from the normal registers. */
8278 if (REX_INT_REG_P (x))
8280 gcc_assert (TARGET_64BIT);
8284 error ("extended registers have no high halves");
8287 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8290 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8293 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8296 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8299 error ("unsupported operand size for extended register");
8307 if (STACK_TOP_P (x))
8309 fputs ("st(0)", file);
8316 if (! ANY_FP_REG_P (x))
8317 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8322 fputs (hi_reg_name[REGNO (x)], file);
8325 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8327 fputs (qi_reg_name[REGNO (x)], file);
8330 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8332 fputs (qi_high_reg_name[REGNO (x)], file);
8339 /* Locate some local-dynamic symbol still in use by this function
8340 so that we can print its name in some tls_local_dynamic_base
8344 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8348 if (GET_CODE (x) == SYMBOL_REF
8349 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8351 cfun->machine->some_ld_name = XSTR (x, 0);
8359 get_some_local_dynamic_name (void)
8363 if (cfun->machine->some_ld_name)
8364 return cfun->machine->some_ld_name;
8366 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8368 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8369 return cfun->machine->some_ld_name;
8375 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8376 C -- print opcode suffix for set/cmov insn.
8377 c -- like C, but print reversed condition
8378 F,f -- likewise, but for floating-point.
8379 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8381 R -- print the prefix for register names.
8382 z -- print the opcode suffix for the size of the current operand.
8383 * -- print a star (in certain assembler syntax)
8384 A -- print an absolute memory reference.
8385 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8386 s -- print a shift double count, followed by the assemblers argument
8388 b -- print the QImode name of the register for the indicated operand.
8389 %b0 would print %al if operands[0] is reg 0.
8390 w -- likewise, print the HImode name of the register.
8391 k -- likewise, print the SImode name of the register.
8392 q -- likewise, print the DImode name of the register.
8393 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8394 y -- print "st(0)" instead of "st" as a register.
8395 D -- print condition for SSE cmp instruction.
8396 P -- if PIC, print an @PLT suffix.
8397 X -- don't print any sort of PIC '@' suffix for a symbol.
8398 & -- print some in-use local-dynamic symbol name.
8399 H -- print a memory address offset by 8; used for sse high-parts
8403 print_operand (FILE *file, rtx x, int code)
8410 if (ASSEMBLER_DIALECT == ASM_ATT)
8415 assemble_name (file, get_some_local_dynamic_name ());
8419 switch (ASSEMBLER_DIALECT)
8426 /* Intel syntax. For absolute addresses, registers should not
8427 be surrounded by braces. */
8431 PRINT_OPERAND (file, x, 0);
8441 PRINT_OPERAND (file, x, 0);
8446 if (ASSEMBLER_DIALECT == ASM_ATT)
8451 if (ASSEMBLER_DIALECT == ASM_ATT)
8456 if (ASSEMBLER_DIALECT == ASM_ATT)
8461 if (ASSEMBLER_DIALECT == ASM_ATT)
8466 if (ASSEMBLER_DIALECT == ASM_ATT)
8471 if (ASSEMBLER_DIALECT == ASM_ATT)
8476 /* 387 opcodes don't get size suffixes if the operands are
8478 if (STACK_REG_P (x))
8481 /* Likewise if using Intel opcodes. */
8482 if (ASSEMBLER_DIALECT == ASM_INTEL)
8485 /* This is the size of op from size of operand. */
8486 switch (GET_MODE_SIZE (GET_MODE (x)))
8495 #ifdef HAVE_GAS_FILDS_FISTS
8505 if (GET_MODE (x) == SFmode)
8520 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8522 #ifdef GAS_MNEMONICS
8548 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8550 PRINT_OPERAND (file, x, 0);
8556 /* Little bit of braindamage here. The SSE compare instructions
8557 does use completely different names for the comparisons that the
8558 fp conditional moves. */
8559 switch (GET_CODE (x))
8574 fputs ("unord", file);
8578 fputs ("neq", file);
8582 fputs ("nlt", file);
8586 fputs ("nle", file);
8589 fputs ("ord", file);
8596 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8597 if (ASSEMBLER_DIALECT == ASM_ATT)
8599 switch (GET_MODE (x))
8601 case HImode: putc ('w', file); break;
8603 case SFmode: putc ('l', file); break;
8605 case DFmode: putc ('q', file); break;
8606 default: gcc_unreachable ();
8613 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8616 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8617 if (ASSEMBLER_DIALECT == ASM_ATT)
8620 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8623 /* Like above, but reverse condition */
8625 /* Check to see if argument to %c is really a constant
8626 and not a condition code which needs to be reversed. */
8627 if (!COMPARISON_P (x))
8629 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8632 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8635 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8636 if (ASSEMBLER_DIALECT == ASM_ATT)
8639 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8643 /* It doesn't actually matter what mode we use here, as we're
8644 only going to use this for printing. */
8645 x = adjust_address_nv (x, DImode, 8);
8652 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8655 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8658 int pred_val = INTVAL (XEXP (x, 0));
8660 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8661 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8663 int taken = pred_val > REG_BR_PROB_BASE / 2;
8664 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8666 /* Emit hints only in the case default branch prediction
8667 heuristics would fail. */
8668 if (taken != cputaken)
8670 /* We use 3e (DS) prefix for taken branches and
8671 2e (CS) prefix for not taken branches. */
8673 fputs ("ds ; ", file);
8675 fputs ("cs ; ", file);
8682 output_operand_lossage ("invalid operand code '%c'", code);
8687 print_reg (x, code, file);
8691 /* No `byte ptr' prefix for call instructions. */
8692 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8695 switch (GET_MODE_SIZE (GET_MODE (x)))
8697 case 1: size = "BYTE"; break;
8698 case 2: size = "WORD"; break;
8699 case 4: size = "DWORD"; break;
8700 case 8: size = "QWORD"; break;
8701 case 12: size = "XWORD"; break;
8702 case 16: size = "XMMWORD"; break;
8707 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8710 else if (code == 'w')
8712 else if (code == 'k')
8716 fputs (" PTR ", file);
8720 /* Avoid (%rip) for call operands. */
8721 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8722 && !CONST_INT_P (x))
8723 output_addr_const (file, x);
8724 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8725 output_operand_lossage ("invalid constraints for operand");
8730 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8735 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8736 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8738 if (ASSEMBLER_DIALECT == ASM_ATT)
8740 fprintf (file, "0x%08lx", l);
8743 /* These float cases don't actually occur as immediate operands. */
8744 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8748 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8749 fprintf (file, "%s", dstr);
8752 else if (GET_CODE (x) == CONST_DOUBLE
8753 && GET_MODE (x) == XFmode)
8757 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8758 fprintf (file, "%s", dstr);
8763 /* We have patterns that allow zero sets of memory, for instance.
8764 In 64-bit mode, we should probably support all 8-byte vectors,
8765 since we can in fact encode that into an immediate. */
8766 if (GET_CODE (x) == CONST_VECTOR)
8768 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8774 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8776 if (ASSEMBLER_DIALECT == ASM_ATT)
8779 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8780 || GET_CODE (x) == LABEL_REF)
8782 if (ASSEMBLER_DIALECT == ASM_ATT)
8785 fputs ("OFFSET FLAT:", file);
8788 if (CONST_INT_P (x))
8789 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8791 output_pic_addr_const (file, x, code);
8793 output_addr_const (file, x);
8797 /* Print a memory operand whose address is ADDR. */
8800 print_operand_address (FILE *file, rtx addr)
8802 struct ix86_address parts;
8803 rtx base, index, disp;
8805 int ok = ix86_decompose_address (addr, &parts);
8810 index = parts.index;
8812 scale = parts.scale;
8820 if (USER_LABEL_PREFIX[0] == 0)
8822 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8828 if (!base && !index)
8830 /* Displacement only requires special attention. */
8832 if (CONST_INT_P (disp))
8834 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8836 if (USER_LABEL_PREFIX[0] == 0)
8838 fputs ("ds:", file);
8840 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8843 output_pic_addr_const (file, disp, 0);
8845 output_addr_const (file, disp);
8847 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8850 if (GET_CODE (disp) == CONST
8851 && GET_CODE (XEXP (disp, 0)) == PLUS
8852 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8853 disp = XEXP (XEXP (disp, 0), 0);
8854 if (GET_CODE (disp) == LABEL_REF
8855 || (GET_CODE (disp) == SYMBOL_REF
8856 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8857 fputs ("(%rip)", file);
8862 if (ASSEMBLER_DIALECT == ASM_ATT)
8867 output_pic_addr_const (file, disp, 0);
8868 else if (GET_CODE (disp) == LABEL_REF)
8869 output_asm_label (disp);
8871 output_addr_const (file, disp);
8876 print_reg (base, 0, file);
8880 print_reg (index, 0, file);
8882 fprintf (file, ",%d", scale);
8888 rtx offset = NULL_RTX;
8892 /* Pull out the offset of a symbol; print any symbol itself. */
8893 if (GET_CODE (disp) == CONST
8894 && GET_CODE (XEXP (disp, 0)) == PLUS
8895 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8897 offset = XEXP (XEXP (disp, 0), 1);
8898 disp = gen_rtx_CONST (VOIDmode,
8899 XEXP (XEXP (disp, 0), 0));
8903 output_pic_addr_const (file, disp, 0);
8904 else if (GET_CODE (disp) == LABEL_REF)
8905 output_asm_label (disp);
8906 else if (CONST_INT_P (disp))
8909 output_addr_const (file, disp);
8915 print_reg (base, 0, file);
8918 if (INTVAL (offset) >= 0)
8920 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8924 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8931 print_reg (index, 0, file);
8933 fprintf (file, "*%d", scale);
8941 output_addr_const_extra (FILE *file, rtx x)
8945 if (GET_CODE (x) != UNSPEC)
8948 op = XVECEXP (x, 0, 0);
8949 switch (XINT (x, 1))
8951 case UNSPEC_GOTTPOFF:
8952 output_addr_const (file, op);
8953 /* FIXME: This might be @TPOFF in Sun ld. */
8954 fputs ("@GOTTPOFF", file);
8957 output_addr_const (file, op);
8958 fputs ("@TPOFF", file);
8961 output_addr_const (file, op);
8963 fputs ("@TPOFF", file);
8965 fputs ("@NTPOFF", file);
8968 output_addr_const (file, op);
8969 fputs ("@DTPOFF", file);
8971 case UNSPEC_GOTNTPOFF:
8972 output_addr_const (file, op);
8974 fputs ("@GOTTPOFF(%rip)", file);
8976 fputs ("@GOTNTPOFF", file);
8978 case UNSPEC_INDNTPOFF:
8979 output_addr_const (file, op);
8980 fputs ("@INDNTPOFF", file);
8990 /* Split one or more DImode RTL references into pairs of SImode
8991 references. The RTL can be REG, offsettable MEM, integer constant, or
8992 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8993 split and "num" is its length. lo_half and hi_half are output arrays
8994 that parallel "operands". */
8997 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9001 rtx op = operands[num];
9003 /* simplify_subreg refuse to split volatile memory addresses,
9004 but we still have to handle it. */
9007 lo_half[num] = adjust_address (op, SImode, 0);
9008 hi_half[num] = adjust_address (op, SImode, 4);
9012 lo_half[num] = simplify_gen_subreg (SImode, op,
9013 GET_MODE (op) == VOIDmode
9014 ? DImode : GET_MODE (op), 0);
9015 hi_half[num] = simplify_gen_subreg (SImode, op,
9016 GET_MODE (op) == VOIDmode
9017 ? DImode : GET_MODE (op), 4);
9021 /* Split one or more TImode RTL references into pairs of DImode
9022 references. The RTL can be REG, offsettable MEM, integer constant, or
9023 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9024 split and "num" is its length. lo_half and hi_half are output arrays
9025 that parallel "operands". */
9028 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9032 rtx op = operands[num];
9034 /* simplify_subreg refuse to split volatile memory addresses, but we
9035 still have to handle it. */
9038 lo_half[num] = adjust_address (op, DImode, 0);
9039 hi_half[num] = adjust_address (op, DImode, 8);
9043 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9044 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9049 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9050 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9051 is the expression of the binary operation. The output may either be
9052 emitted here, or returned to the caller, like all output_* functions.
9054 There is no guarantee that the operands are the same mode, as they
9055 might be within FLOAT or FLOAT_EXTEND expressions. */
9057 #ifndef SYSV386_COMPAT
9058 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9059 wants to fix the assemblers because that causes incompatibility
9060 with gcc. No-one wants to fix gcc because that causes
9061 incompatibility with assemblers... You can use the option of
9062 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9063 #define SYSV386_COMPAT 1
9067 output_387_binary_op (rtx insn, rtx *operands)
9069 static char buf[30];
9072 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9074 #ifdef ENABLE_CHECKING
9075 /* Even if we do not want to check the inputs, this documents input
9076 constraints. Which helps in understanding the following code. */
9077 if (STACK_REG_P (operands[0])
9078 && ((REG_P (operands[1])
9079 && REGNO (operands[0]) == REGNO (operands[1])
9080 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9081 || (REG_P (operands[2])
9082 && REGNO (operands[0]) == REGNO (operands[2])
9083 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9084 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9087 gcc_assert (is_sse);
9090 switch (GET_CODE (operands[3]))
9093 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9094 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9102 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9103 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9111 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9112 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9120 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9121 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9135 if (GET_MODE (operands[0]) == SFmode)
9136 strcat (buf, "ss\t{%2, %0|%0, %2}");
9138 strcat (buf, "sd\t{%2, %0|%0, %2}");
9143 switch (GET_CODE (operands[3]))
9147 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9149 rtx temp = operands[2];
9150 operands[2] = operands[1];
9154 /* know operands[0] == operands[1]. */
9156 if (MEM_P (operands[2]))
9162 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9164 if (STACK_TOP_P (operands[0]))
9165 /* How is it that we are storing to a dead operand[2]?
9166 Well, presumably operands[1] is dead too. We can't
9167 store the result to st(0) as st(0) gets popped on this
9168 instruction. Instead store to operands[2] (which I
9169 think has to be st(1)). st(1) will be popped later.
9170 gcc <= 2.8.1 didn't have this check and generated
9171 assembly code that the Unixware assembler rejected. */
9172 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9174 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9178 if (STACK_TOP_P (operands[0]))
9179 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9181 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9186 if (MEM_P (operands[1]))
9192 if (MEM_P (operands[2]))
9198 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9201 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9202 derived assemblers, confusingly reverse the direction of
9203 the operation for fsub{r} and fdiv{r} when the
9204 destination register is not st(0). The Intel assembler
9205 doesn't have this brain damage. Read !SYSV386_COMPAT to
9206 figure out what the hardware really does. */
9207 if (STACK_TOP_P (operands[0]))
9208 p = "{p\t%0, %2|rp\t%2, %0}";
9210 p = "{rp\t%2, %0|p\t%0, %2}";
9212 if (STACK_TOP_P (operands[0]))
9213 /* As above for fmul/fadd, we can't store to st(0). */
9214 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9216 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9221 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9224 if (STACK_TOP_P (operands[0]))
9225 p = "{rp\t%0, %1|p\t%1, %0}";
9227 p = "{p\t%1, %0|rp\t%0, %1}";
9229 if (STACK_TOP_P (operands[0]))
9230 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9232 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9237 if (STACK_TOP_P (operands[0]))
9239 if (STACK_TOP_P (operands[1]))
9240 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9242 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9245 else if (STACK_TOP_P (operands[1]))
9248 p = "{\t%1, %0|r\t%0, %1}";
9250 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9256 p = "{r\t%2, %0|\t%0, %2}";
9258 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9271 /* Return needed mode for entity in optimize_mode_switching pass. */
9274 ix86_mode_needed (int entity, rtx insn)
9276 enum attr_i387_cw mode;
9278 /* The mode UNINITIALIZED is used to store control word after a
9279 function call or ASM pattern. The mode ANY specify that function
9280 has no requirements on the control word and make no changes in the
9281 bits we are interested in. */
9284 || (NONJUMP_INSN_P (insn)
9285 && (asm_noperands (PATTERN (insn)) >= 0
9286 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9287 return I387_CW_UNINITIALIZED;
9289 if (recog_memoized (insn) < 0)
9292 mode = get_attr_i387_cw (insn);
9297 if (mode == I387_CW_TRUNC)
9302 if (mode == I387_CW_FLOOR)
9307 if (mode == I387_CW_CEIL)
9312 if (mode == I387_CW_MASK_PM)
9323 /* Output code to initialize control word copies used by trunc?f?i and
9324 rounding patterns. CURRENT_MODE is set to current control word,
9325 while NEW_MODE is set to new control word. */
9328 emit_i387_cw_initialization (int mode)
9330 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9335 rtx reg = gen_reg_rtx (HImode);
9337 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9338 emit_move_insn (reg, copy_rtx (stored_mode));
9340 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9345 /* round toward zero (truncate) */
9346 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9347 slot = SLOT_CW_TRUNC;
9351 /* round down toward -oo */
9352 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9353 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9354 slot = SLOT_CW_FLOOR;
9358 /* round up toward +oo */
9359 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9360 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9361 slot = SLOT_CW_CEIL;
9364 case I387_CW_MASK_PM:
9365 /* mask precision exception for nearbyint() */
9366 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9367 slot = SLOT_CW_MASK_PM;
9379 /* round toward zero (truncate) */
9380 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9381 slot = SLOT_CW_TRUNC;
9385 /* round down toward -oo */
9386 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9387 slot = SLOT_CW_FLOOR;
9391 /* round up toward +oo */
9392 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9393 slot = SLOT_CW_CEIL;
9396 case I387_CW_MASK_PM:
9397 /* mask precision exception for nearbyint() */
9398 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9399 slot = SLOT_CW_MASK_PM;
9407 gcc_assert (slot < MAX_386_STACK_LOCALS);
9409 new_mode = assign_386_stack_local (HImode, slot);
9410 emit_move_insn (new_mode, reg);
9413 /* Output code for INSN to convert a float to a signed int. OPERANDS
9414 are the insn operands. The output may be [HSD]Imode and the input
9415 operand may be [SDX]Fmode. */
9418 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9420 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9421 int dimode_p = GET_MODE (operands[0]) == DImode;
9422 int round_mode = get_attr_i387_cw (insn);
9424 /* Jump through a hoop or two for DImode, since the hardware has no
9425 non-popping instruction. We used to do this a different way, but
9426 that was somewhat fragile and broke with post-reload splitters. */
9427 if ((dimode_p || fisttp) && !stack_top_dies)
9428 output_asm_insn ("fld\t%y1", operands);
9430 gcc_assert (STACK_TOP_P (operands[1]));
9431 gcc_assert (MEM_P (operands[0]));
9432 gcc_assert (GET_MODE (operands[1]) != TFmode);
9435 output_asm_insn ("fisttp%z0\t%0", operands);
9438 if (round_mode != I387_CW_ANY)
9439 output_asm_insn ("fldcw\t%3", operands);
9440 if (stack_top_dies || dimode_p)
9441 output_asm_insn ("fistp%z0\t%0", operands);
9443 output_asm_insn ("fist%z0\t%0", operands);
9444 if (round_mode != I387_CW_ANY)
9445 output_asm_insn ("fldcw\t%2", operands);
9451 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9452 have the values zero or one, indicates the ffreep insn's operand
9453 from the OPERANDS array. */
9456 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9458 if (TARGET_USE_FFREEP)
9459 #if HAVE_AS_IX86_FFREEP
9460 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9463 static char retval[] = ".word\t0xc_df";
9464 int regno = REGNO (operands[opno]);
9466 gcc_assert (FP_REGNO_P (regno));
9468 retval[9] = '0' + (regno - FIRST_STACK_REG);
9473 return opno ? "fstp\t%y1" : "fstp\t%y0";
9477 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9478 should be used. UNORDERED_P is true when fucom should be used. */
9481 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9484 rtx cmp_op0, cmp_op1;
9485 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9489 cmp_op0 = operands[0];
9490 cmp_op1 = operands[1];
9494 cmp_op0 = operands[1];
9495 cmp_op1 = operands[2];
9500 if (GET_MODE (operands[0]) == SFmode)
9502 return "ucomiss\t{%1, %0|%0, %1}";
9504 return "comiss\t{%1, %0|%0, %1}";
9507 return "ucomisd\t{%1, %0|%0, %1}";
9509 return "comisd\t{%1, %0|%0, %1}";
9512 gcc_assert (STACK_TOP_P (cmp_op0));
9514 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9516 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9520 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9521 return output_387_ffreep (operands, 1);
9524 return "ftst\n\tfnstsw\t%0";
9527 if (STACK_REG_P (cmp_op1)
9529 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9530 && REGNO (cmp_op1) != FIRST_STACK_REG)
9532 /* If both the top of the 387 stack dies, and the other operand
9533 is also a stack register that dies, then this must be a
9534 `fcompp' float compare */
9538 /* There is no double popping fcomi variant. Fortunately,
9539 eflags is immune from the fstp's cc clobbering. */
9541 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9543 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9544 return output_387_ffreep (operands, 0);
9549 return "fucompp\n\tfnstsw\t%0";
9551 return "fcompp\n\tfnstsw\t%0";
9556 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9558 static const char * const alt[16] =
9560 "fcom%z2\t%y2\n\tfnstsw\t%0",
9561 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9562 "fucom%z2\t%y2\n\tfnstsw\t%0",
9563 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9565 "ficom%z2\t%y2\n\tfnstsw\t%0",
9566 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9570 "fcomi\t{%y1, %0|%0, %y1}",
9571 "fcomip\t{%y1, %0|%0, %y1}",
9572 "fucomi\t{%y1, %0|%0, %y1}",
9573 "fucomip\t{%y1, %0|%0, %y1}",
9584 mask = eflags_p << 3;
9585 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9586 mask |= unordered_p << 1;
9587 mask |= stack_top_dies;
9589 gcc_assert (mask < 16);
9598 ix86_output_addr_vec_elt (FILE *file, int value)
9600 const char *directive = ASM_LONG;
9604 directive = ASM_QUAD;
9606 gcc_assert (!TARGET_64BIT);
9609 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9613 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9615 const char *directive = ASM_LONG;
9618 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9619 directive = ASM_QUAD;
9621 gcc_assert (!TARGET_64BIT);
9623 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9624 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9625 fprintf (file, "%s%s%d-%s%d\n",
9626 directive, LPREFIX, value, LPREFIX, rel);
9627 else if (HAVE_AS_GOTOFF_IN_DATA)
9628 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9630 else if (TARGET_MACHO)
9632 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9633 machopic_output_function_base_name (file);
9634 fprintf(file, "\n");
9638 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9639 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9642 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9646 ix86_expand_clear (rtx dest)
9650 /* We play register width games, which are only valid after reload. */
9651 gcc_assert (reload_completed);
9653 /* Avoid HImode and its attendant prefix byte. */
9654 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9655 dest = gen_rtx_REG (SImode, REGNO (dest));
9656 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9658 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9659 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9661 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9662 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9668 /* X is an unchanging MEM. If it is a constant pool reference, return
9669 the constant pool rtx, else NULL. */
9672 maybe_get_pool_constant (rtx x)
9674 x = ix86_delegitimize_address (XEXP (x, 0));
9676 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9677 return get_pool_constant (x);
9683 ix86_expand_move (enum machine_mode mode, rtx operands[])
9685 int strict = (reload_in_progress || reload_completed);
9687 enum tls_model model;
9692 if (GET_CODE (op1) == SYMBOL_REF)
9694 model = SYMBOL_REF_TLS_MODEL (op1);
9697 op1 = legitimize_tls_address (op1, model, true);
9698 op1 = force_operand (op1, op0);
9702 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9703 && SYMBOL_REF_DLLIMPORT_P (op1))
9704 op1 = legitimize_dllimport_symbol (op1, false);
9706 else if (GET_CODE (op1) == CONST
9707 && GET_CODE (XEXP (op1, 0)) == PLUS
9708 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9710 rtx addend = XEXP (XEXP (op1, 0), 1);
9711 rtx symbol = XEXP (XEXP (op1, 0), 0);
9714 model = SYMBOL_REF_TLS_MODEL (symbol);
9716 tmp = legitimize_tls_address (symbol, model, true);
9717 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9718 && SYMBOL_REF_DLLIMPORT_P (symbol))
9719 tmp = legitimize_dllimport_symbol (symbol, true);
9723 tmp = force_operand (tmp, NULL);
9724 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9725 op0, 1, OPTAB_DIRECT);
9731 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9733 if (TARGET_MACHO && !TARGET_64BIT)
9738 rtx temp = ((reload_in_progress
9739 || ((op0 && REG_P (op0))
9741 ? op0 : gen_reg_rtx (Pmode));
9742 op1 = machopic_indirect_data_reference (op1, temp);
9743 op1 = machopic_legitimize_pic_address (op1, mode,
9744 temp == op1 ? 0 : temp);
9746 else if (MACHOPIC_INDIRECT)
9747 op1 = machopic_indirect_data_reference (op1, 0);
9755 op1 = force_reg (Pmode, op1);
9756 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9758 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9759 op1 = legitimize_pic_address (op1, reg);
9768 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9769 || !push_operand (op0, mode))
9771 op1 = force_reg (mode, op1);
9773 if (push_operand (op0, mode)
9774 && ! general_no_elim_operand (op1, mode))
9775 op1 = copy_to_mode_reg (mode, op1);
9777 /* Force large constants in 64bit compilation into register
9778 to get them CSEed. */
9779 if (TARGET_64BIT && mode == DImode
9780 && immediate_operand (op1, mode)
9781 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9782 && !register_operand (op0, mode)
9783 && optimize && !reload_completed && !reload_in_progress)
9784 op1 = copy_to_mode_reg (mode, op1);
9786 if (FLOAT_MODE_P (mode))
9788 /* If we are loading a floating point constant to a register,
9789 force the value to memory now, since we'll get better code
9790 out the back end. */
9794 else if (GET_CODE (op1) == CONST_DOUBLE)
9796 op1 = validize_mem (force_const_mem (mode, op1));
9797 if (!register_operand (op0, mode))
9799 rtx temp = gen_reg_rtx (mode);
9800 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9801 emit_move_insn (op0, temp);
9808 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9812 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9814 rtx op0 = operands[0], op1 = operands[1];
9815 unsigned int align = GET_MODE_ALIGNMENT (mode);
9817 /* Force constants other than zero into memory. We do not know how
9818 the instructions used to build constants modify the upper 64 bits
9819 of the register, once we have that information we may be able
9820 to handle some of them more efficiently. */
9821 if ((reload_in_progress | reload_completed) == 0
9822 && register_operand (op0, mode)
9823 && (CONSTANT_P (op1)
9824 || (GET_CODE (op1) == SUBREG
9825 && CONSTANT_P (SUBREG_REG (op1))))
9826 && standard_sse_constant_p (op1) <= 0)
9827 op1 = validize_mem (force_const_mem (mode, op1));
9829 /* TDmode values are passed as TImode on the stack. Timode values
9830 are moved via xmm registers, and moving them to stack can result in
9831 unaligned memory access. Use ix86_expand_vector_move_misalign()
9832 if memory operand is not aligned correctly. */
9834 && (mode == TImode) && !TARGET_64BIT
9835 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9836 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9840 /* ix86_expand_vector_move_misalign() does not like constants ... */
9841 if (CONSTANT_P (op1)
9842 || (GET_CODE (op1) == SUBREG
9843 && CONSTANT_P (SUBREG_REG (op1))))
9844 op1 = validize_mem (force_const_mem (mode, op1));
9846 /* ... nor both arguments in memory. */
9847 if (!register_operand (op0, mode)
9848 && !register_operand (op1, mode))
9849 op1 = force_reg (mode, op1);
9851 tmp[0] = op0; tmp[1] = op1;
9852 ix86_expand_vector_move_misalign (mode, tmp);
9856 /* Make operand1 a register if it isn't already. */
9858 && !register_operand (op0, mode)
9859 && !register_operand (op1, mode))
9861 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9865 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9868 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9869 straight to ix86_expand_vector_move. */
9870 /* Code generation for scalar reg-reg moves of single and double precision data:
9871 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9875 if (x86_sse_partial_reg_dependency == true)
9880 Code generation for scalar loads of double precision data:
9881 if (x86_sse_split_regs == true)
9882 movlpd mem, reg (gas syntax)
9886 Code generation for unaligned packed loads of single precision data
9887 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9888 if (x86_sse_unaligned_move_optimal)
9891 if (x86_sse_partial_reg_dependency == true)
9903 Code generation for unaligned packed loads of double precision data
9904 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9905 if (x86_sse_unaligned_move_optimal)
9908 if (x86_sse_split_regs == true)
9921 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9930 /* If we're optimizing for size, movups is the smallest. */
9933 op0 = gen_lowpart (V4SFmode, op0);
9934 op1 = gen_lowpart (V4SFmode, op1);
9935 emit_insn (gen_sse_movups (op0, op1));
9939 /* ??? If we have typed data, then it would appear that using
9940 movdqu is the only way to get unaligned data loaded with
9942 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9944 op0 = gen_lowpart (V16QImode, op0);
9945 op1 = gen_lowpart (V16QImode, op1);
9946 emit_insn (gen_sse2_movdqu (op0, op1));
9950 if (TARGET_SSE2 && mode == V2DFmode)
9954 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9956 op0 = gen_lowpart (V2DFmode, op0);
9957 op1 = gen_lowpart (V2DFmode, op1);
9958 emit_insn (gen_sse2_movupd (op0, op1));
9962 /* When SSE registers are split into halves, we can avoid
9963 writing to the top half twice. */
9964 if (TARGET_SSE_SPLIT_REGS)
9966 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9971 /* ??? Not sure about the best option for the Intel chips.
9972 The following would seem to satisfy; the register is
9973 entirely cleared, breaking the dependency chain. We
9974 then store to the upper half, with a dependency depth
9975 of one. A rumor has it that Intel recommends two movsd
9976 followed by an unpacklpd, but this is unconfirmed. And
9977 given that the dependency depth of the unpacklpd would
9978 still be one, I'm not sure why this would be better. */
9979 zero = CONST0_RTX (V2DFmode);
9982 m = adjust_address (op1, DFmode, 0);
9983 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9984 m = adjust_address (op1, DFmode, 8);
9985 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9989 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9991 op0 = gen_lowpart (V4SFmode, op0);
9992 op1 = gen_lowpart (V4SFmode, op1);
9993 emit_insn (gen_sse_movups (op0, op1));
9997 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9998 emit_move_insn (op0, CONST0_RTX (mode));
10000 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10002 if (mode != V4SFmode)
10003 op0 = gen_lowpart (V4SFmode, op0);
10004 m = adjust_address (op1, V2SFmode, 0);
10005 emit_insn (gen_sse_loadlps (op0, op0, m));
10006 m = adjust_address (op1, V2SFmode, 8);
10007 emit_insn (gen_sse_loadhps (op0, op0, m));
10010 else if (MEM_P (op0))
10012 /* If we're optimizing for size, movups is the smallest. */
10015 op0 = gen_lowpart (V4SFmode, op0);
10016 op1 = gen_lowpart (V4SFmode, op1);
10017 emit_insn (gen_sse_movups (op0, op1));
10021 /* ??? Similar to above, only less clear because of quote
10022 typeless stores unquote. */
10023 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10024 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10026 op0 = gen_lowpart (V16QImode, op0);
10027 op1 = gen_lowpart (V16QImode, op1);
10028 emit_insn (gen_sse2_movdqu (op0, op1));
10032 if (TARGET_SSE2 && mode == V2DFmode)
10034 m = adjust_address (op0, DFmode, 0);
10035 emit_insn (gen_sse2_storelpd (m, op1));
10036 m = adjust_address (op0, DFmode, 8);
10037 emit_insn (gen_sse2_storehpd (m, op1));
10041 if (mode != V4SFmode)
10042 op1 = gen_lowpart (V4SFmode, op1);
10043 m = adjust_address (op0, V2SFmode, 0);
10044 emit_insn (gen_sse_storelps (m, op1));
10045 m = adjust_address (op0, V2SFmode, 8);
10046 emit_insn (gen_sse_storehps (m, op1));
10050 gcc_unreachable ();
10053 /* Expand a push in MODE. This is some mode for which we do not support
10054 proper push instructions, at least from the registers that we expect
10055 the value to live in. */
10058 ix86_expand_push (enum machine_mode mode, rtx x)
10062 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10063 GEN_INT (-GET_MODE_SIZE (mode)),
10064 stack_pointer_rtx, 1, OPTAB_DIRECT);
10065 if (tmp != stack_pointer_rtx)
10066 emit_move_insn (stack_pointer_rtx, tmp);
10068 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10069 emit_move_insn (tmp, x);
10072 /* Helper function of ix86_fixup_binary_operands to canonicalize
10073 operand order. Returns true if the operands should be swapped. */
10076 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10079 rtx dst = operands[0];
10080 rtx src1 = operands[1];
10081 rtx src2 = operands[2];
10083 /* If the operation is not commutative, we can't do anything. */
10084 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10087 /* Highest priority is that src1 should match dst. */
10088 if (rtx_equal_p (dst, src1))
10090 if (rtx_equal_p (dst, src2))
10093 /* Next highest priority is that immediate constants come second. */
10094 if (immediate_operand (src2, mode))
10096 if (immediate_operand (src1, mode))
10099 /* Lowest priority is that memory references should come second. */
10109 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10110 destination to use for the operation. If different from the true
10111 destination in operands[0], a copy operation will be required. */
10114 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10117 rtx dst = operands[0];
10118 rtx src1 = operands[1];
10119 rtx src2 = operands[2];
10121 /* Canonicalize operand order. */
10122 if (ix86_swap_binary_operands_p (code, mode, operands))
10129 /* Both source operands cannot be in memory. */
10130 if (MEM_P (src1) && MEM_P (src2))
10132 /* Optimization: Only read from memory once. */
10133 if (rtx_equal_p (src1, src2))
10135 src2 = force_reg (mode, src2);
10139 src2 = force_reg (mode, src2);
10142 /* If the destination is memory, and we do not have matching source
10143 operands, do things in registers. */
10144 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10145 dst = gen_reg_rtx (mode);
10147 /* Source 1 cannot be a constant. */
10148 if (CONSTANT_P (src1))
10149 src1 = force_reg (mode, src1);
10151 /* Source 1 cannot be a non-matching memory. */
10152 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10153 src1 = force_reg (mode, src1);
10155 operands[1] = src1;
10156 operands[2] = src2;
10160 /* Similarly, but assume that the destination has already been
10161 set up properly. */
10164 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10165 enum machine_mode mode, rtx operands[])
10167 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10168 gcc_assert (dst == operands[0]);
10171 /* Attempt to expand a binary operator. Make the expansion closer to the
10172 actual machine, then just general_operand, which will allow 3 separate
10173 memory references (one output, two input) in a single insn. */
10176 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10179 rtx src1, src2, dst, op, clob;
10181 dst = ix86_fixup_binary_operands (code, mode, operands);
10182 src1 = operands[1];
10183 src2 = operands[2];
10185 /* Emit the instruction. */
10187 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10188 if (reload_in_progress)
10190 /* Reload doesn't know about the flags register, and doesn't know that
10191 it doesn't want to clobber it. We can only do this with PLUS. */
10192 gcc_assert (code == PLUS);
10197 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10198 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10201 /* Fix up the destination if needed. */
10202 if (dst != operands[0])
10203 emit_move_insn (operands[0], dst);
10206 /* Return TRUE or FALSE depending on whether the binary operator meets the
10207 appropriate constraints. */
10210 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10213 rtx dst = operands[0];
10214 rtx src1 = operands[1];
10215 rtx src2 = operands[2];
10217 /* Both source operands cannot be in memory. */
10218 if (MEM_P (src1) && MEM_P (src2))
10221 /* Canonicalize operand order for commutative operators. */
10222 if (ix86_swap_binary_operands_p (code, mode, operands))
10229 /* If the destination is memory, we must have a matching source operand. */
10230 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10233 /* Source 1 cannot be a constant. */
10234 if (CONSTANT_P (src1))
10237 /* Source 1 cannot be a non-matching memory. */
10238 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10244 /* Attempt to expand a unary operator. Make the expansion closer to the
10245 actual machine, then just general_operand, which will allow 2 separate
10246 memory references (one output, one input) in a single insn. */
10249 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10252 int matching_memory;
10253 rtx src, dst, op, clob;
10258 /* If the destination is memory, and we do not have matching source
10259 operands, do things in registers. */
10260 matching_memory = 0;
10263 if (rtx_equal_p (dst, src))
10264 matching_memory = 1;
10266 dst = gen_reg_rtx (mode);
10269 /* When source operand is memory, destination must match. */
10270 if (MEM_P (src) && !matching_memory)
10271 src = force_reg (mode, src);
10273 /* Emit the instruction. */
10275 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10276 if (reload_in_progress || code == NOT)
10278 /* Reload doesn't know about the flags register, and doesn't know that
10279 it doesn't want to clobber it. */
10280 gcc_assert (code == NOT);
10285 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10286 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10289 /* Fix up the destination if needed. */
10290 if (dst != operands[0])
10291 emit_move_insn (operands[0], dst);
10294 /* Return TRUE or FALSE depending on whether the unary operator meets the
10295 appropriate constraints. */
10298 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10299 enum machine_mode mode ATTRIBUTE_UNUSED,
10300 rtx operands[2] ATTRIBUTE_UNUSED)
10302 /* If one of operands is memory, source and destination must match. */
10303 if ((MEM_P (operands[0])
10304 || MEM_P (operands[1]))
10305 && ! rtx_equal_p (operands[0], operands[1]))
10310 /* Post-reload splitter for converting an SF or DFmode value in an
10311 SSE register into an unsigned SImode. */
10314 ix86_split_convert_uns_si_sse (rtx operands[])
10316 enum machine_mode vecmode;
10317 rtx value, large, zero_or_two31, input, two31, x;
10319 large = operands[1];
10320 zero_or_two31 = operands[2];
10321 input = operands[3];
10322 two31 = operands[4];
10323 vecmode = GET_MODE (large);
10324 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10326 /* Load up the value into the low element. We must ensure that the other
10327 elements are valid floats -- zero is the easiest such value. */
10330 if (vecmode == V4SFmode)
10331 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10333 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10337 input = gen_rtx_REG (vecmode, REGNO (input));
10338 emit_move_insn (value, CONST0_RTX (vecmode));
10339 if (vecmode == V4SFmode)
10340 emit_insn (gen_sse_movss (value, value, input));
10342 emit_insn (gen_sse2_movsd (value, value, input));
10345 emit_move_insn (large, two31);
10346 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10348 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10349 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10351 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10352 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10354 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10355 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10357 large = gen_rtx_REG (V4SImode, REGNO (large));
10358 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10360 x = gen_rtx_REG (V4SImode, REGNO (value));
10361 if (vecmode == V4SFmode)
10362 emit_insn (gen_sse2_cvttps2dq (x, value));
10364 emit_insn (gen_sse2_cvttpd2dq (x, value));
10367 emit_insn (gen_xorv4si3 (value, value, large));
10370 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10371 Expects the 64-bit DImode to be supplied in a pair of integral
10372 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10373 -mfpmath=sse, !optimize_size only. */
10376 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10378 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10379 rtx int_xmm, fp_xmm;
10380 rtx biases, exponents;
10383 int_xmm = gen_reg_rtx (V4SImode);
10384 if (TARGET_INTER_UNIT_MOVES)
10385 emit_insn (gen_movdi_to_sse (int_xmm, input));
10386 else if (TARGET_SSE_SPLIT_REGS)
10388 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10389 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10393 x = gen_reg_rtx (V2DImode);
10394 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10395 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10398 x = gen_rtx_CONST_VECTOR (V4SImode,
10399 gen_rtvec (4, GEN_INT (0x43300000UL),
10400 GEN_INT (0x45300000UL),
10401 const0_rtx, const0_rtx));
10402 exponents = validize_mem (force_const_mem (V4SImode, x));
10404 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10405 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10407 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10408 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10409 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10410 (0x1.0p84 + double(fp_value_hi_xmm)).
10411 Note these exponents differ by 32. */
10413 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10415 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10416 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10417 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10418 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10419 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10420 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10421 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10422 biases = validize_mem (force_const_mem (V2DFmode, biases));
10423 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10425 /* Add the upper and lower DFmode values together. */
10427 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10430 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10431 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10432 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10435 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10438 /* Convert an unsigned SImode value into a DFmode. Only currently used
10439 for SSE, but applicable anywhere. */
10442 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10444 REAL_VALUE_TYPE TWO31r;
10447 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10448 NULL, 1, OPTAB_DIRECT);
10450 fp = gen_reg_rtx (DFmode);
10451 emit_insn (gen_floatsidf2 (fp, x));
10453 real_ldexp (&TWO31r, &dconst1, 31);
10454 x = const_double_from_real_value (TWO31r, DFmode);
10456 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10458 emit_move_insn (target, x);
10461 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10462 32-bit mode; otherwise we have a direct convert instruction. */
10465 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10467 REAL_VALUE_TYPE TWO32r;
10468 rtx fp_lo, fp_hi, x;
10470 fp_lo = gen_reg_rtx (DFmode);
10471 fp_hi = gen_reg_rtx (DFmode);
10473 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10475 real_ldexp (&TWO32r, &dconst1, 32);
10476 x = const_double_from_real_value (TWO32r, DFmode);
10477 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10479 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10481 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10484 emit_move_insn (target, x);
10487 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10488 For x86_32, -mfpmath=sse, !optimize_size only. */
10490 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10492 REAL_VALUE_TYPE ONE16r;
10493 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10495 real_ldexp (&ONE16r, &dconst1, 16);
10496 x = const_double_from_real_value (ONE16r, SFmode);
10497 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10498 NULL, 0, OPTAB_DIRECT);
10499 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10500 NULL, 0, OPTAB_DIRECT);
10501 fp_hi = gen_reg_rtx (SFmode);
10502 fp_lo = gen_reg_rtx (SFmode);
10503 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10504 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10505 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10507 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10509 if (!rtx_equal_p (target, fp_hi))
10510 emit_move_insn (target, fp_hi);
10513 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10514 then replicate the value for all elements of the vector
10518 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10525 v = gen_rtvec (4, value, value, value, value);
10526 return gen_rtx_CONST_VECTOR (V4SImode, v);
10530 v = gen_rtvec (2, value, value);
10531 return gen_rtx_CONST_VECTOR (V2DImode, v);
10535 v = gen_rtvec (4, value, value, value, value);
10537 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10538 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10539 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10543 v = gen_rtvec (2, value, value);
10545 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10546 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10549 gcc_unreachable ();
10553 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10554 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10555 for an SSE register. If VECT is true, then replicate the mask for
10556 all elements of the vector register. If INVERT is true, then create
10557 a mask excluding the sign bit. */
10560 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10562 enum machine_mode vec_mode, imode;
10563 HOST_WIDE_INT hi, lo;
10568 /* Find the sign bit, sign extended to 2*HWI. */
10574 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10575 lo = 0x80000000, hi = lo < 0;
10581 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10582 if (HOST_BITS_PER_WIDE_INT >= 64)
10583 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10585 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10589 gcc_unreachable ();
10593 lo = ~lo, hi = ~hi;
10595 /* Force this value into the low part of a fp vector constant. */
10596 mask = immed_double_const (lo, hi, imode);
10597 mask = gen_lowpart (mode, mask);
10599 v = ix86_build_const_vector (mode, vect, mask);
10600 return force_reg (vec_mode, v);
10603 /* Generate code for floating point ABS or NEG. */
10606 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10609 rtx mask, set, use, clob, dst, src;
10610 bool matching_memory;
10611 bool use_sse = false;
10612 bool vector_mode = VECTOR_MODE_P (mode);
10613 enum machine_mode elt_mode = mode;
10617 elt_mode = GET_MODE_INNER (mode);
10620 else if (TARGET_SSE_MATH)
10621 use_sse = SSE_FLOAT_MODE_P (mode);
10623 /* NEG and ABS performed with SSE use bitwise mask operations.
10624 Create the appropriate mask now. */
10626 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10633 /* If the destination is memory, and we don't have matching source
10634 operands or we're using the x87, do things in registers. */
10635 matching_memory = false;
10638 if (use_sse && rtx_equal_p (dst, src))
10639 matching_memory = true;
10641 dst = gen_reg_rtx (mode);
10643 if (MEM_P (src) && !matching_memory)
10644 src = force_reg (mode, src);
10648 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10649 set = gen_rtx_SET (VOIDmode, dst, set);
10654 set = gen_rtx_fmt_e (code, mode, src);
10655 set = gen_rtx_SET (VOIDmode, dst, set);
10658 use = gen_rtx_USE (VOIDmode, mask);
10659 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10660 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10661 gen_rtvec (3, set, use, clob)));
10667 if (dst != operands[0])
10668 emit_move_insn (operands[0], dst);
10671 /* Expand a copysign operation. Special case operand 0 being a constant. */
10674 ix86_expand_copysign (rtx operands[])
10676 enum machine_mode mode, vmode;
10677 rtx dest, op0, op1, mask, nmask;
10679 dest = operands[0];
10683 mode = GET_MODE (dest);
10684 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10686 if (GET_CODE (op0) == CONST_DOUBLE)
10690 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10691 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10693 if (op0 == CONST0_RTX (mode))
10694 op0 = CONST0_RTX (vmode);
10697 if (mode == SFmode)
10698 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10699 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10701 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10702 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10705 mask = ix86_build_signbit_mask (mode, 0, 0);
10707 if (mode == SFmode)
10708 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10710 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10714 nmask = ix86_build_signbit_mask (mode, 0, 1);
10715 mask = ix86_build_signbit_mask (mode, 0, 0);
10717 if (mode == SFmode)
10718 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10720 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10724 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10725 be a constant, and so has already been expanded into a vector constant. */
10728 ix86_split_copysign_const (rtx operands[])
10730 enum machine_mode mode, vmode;
10731 rtx dest, op0, op1, mask, x;
10733 dest = operands[0];
10736 mask = operands[3];
10738 mode = GET_MODE (dest);
10739 vmode = GET_MODE (mask);
10741 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10742 x = gen_rtx_AND (vmode, dest, mask);
10743 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10745 if (op0 != CONST0_RTX (vmode))
10747 x = gen_rtx_IOR (vmode, dest, op0);
10748 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10752 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10753 so we have to do two masks. */
10756 ix86_split_copysign_var (rtx operands[])
10758 enum machine_mode mode, vmode;
10759 rtx dest, scratch, op0, op1, mask, nmask, x;
10761 dest = operands[0];
10762 scratch = operands[1];
10765 nmask = operands[4];
10766 mask = operands[5];
10768 mode = GET_MODE (dest);
10769 vmode = GET_MODE (mask);
10771 if (rtx_equal_p (op0, op1))
10773 /* Shouldn't happen often (it's useless, obviously), but when it does
10774 we'd generate incorrect code if we continue below. */
10775 emit_move_insn (dest, op0);
10779 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10781 gcc_assert (REGNO (op1) == REGNO (scratch));
10783 x = gen_rtx_AND (vmode, scratch, mask);
10784 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10787 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10788 x = gen_rtx_NOT (vmode, dest);
10789 x = gen_rtx_AND (vmode, x, op0);
10790 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10794 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10796 x = gen_rtx_AND (vmode, scratch, mask);
10798 else /* alternative 2,4 */
10800 gcc_assert (REGNO (mask) == REGNO (scratch));
10801 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10802 x = gen_rtx_AND (vmode, scratch, op1);
10804 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10806 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10808 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10809 x = gen_rtx_AND (vmode, dest, nmask);
10811 else /* alternative 3,4 */
10813 gcc_assert (REGNO (nmask) == REGNO (dest));
10815 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10816 x = gen_rtx_AND (vmode, dest, op0);
10818 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10821 x = gen_rtx_IOR (vmode, dest, scratch);
10822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10825 /* Return TRUE or FALSE depending on whether the first SET in INSN
10826 has source and destination with matching CC modes, and that the
10827 CC mode is at least as constrained as REQ_MODE. */
10830 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10833 enum machine_mode set_mode;
10835 set = PATTERN (insn);
10836 if (GET_CODE (set) == PARALLEL)
10837 set = XVECEXP (set, 0, 0);
10838 gcc_assert (GET_CODE (set) == SET);
10839 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10841 set_mode = GET_MODE (SET_DEST (set));
10845 if (req_mode != CCNOmode
10846 && (req_mode != CCmode
10847 || XEXP (SET_SRC (set), 1) != const0_rtx))
10851 if (req_mode == CCGCmode)
10855 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10859 if (req_mode == CCZmode)
10866 gcc_unreachable ();
10869 return (GET_MODE (SET_SRC (set)) == set_mode);
10872 /* Generate insn patterns to do an integer compare of OPERANDS. */
10875 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10877 enum machine_mode cmpmode;
10880 cmpmode = SELECT_CC_MODE (code, op0, op1);
10881 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10883 /* This is very simple, but making the interface the same as in the
10884 FP case makes the rest of the code easier. */
10885 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10886 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10888 /* Return the test that should be put into the flags user, i.e.
10889 the bcc, scc, or cmov instruction. */
10890 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10893 /* Figure out whether to use ordered or unordered fp comparisons.
10894 Return the appropriate mode to use. */
10897 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10899 /* ??? In order to make all comparisons reversible, we do all comparisons
10900 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10901 all forms trapping and nontrapping comparisons, we can make inequality
10902 comparisons trapping again, since it results in better code when using
10903 FCOM based compares. */
10904 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10908 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10910 enum machine_mode mode = GET_MODE (op0);
10912 if (SCALAR_FLOAT_MODE_P (mode))
10914 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10915 return ix86_fp_compare_mode (code);
10920 /* Only zero flag is needed. */
10921 case EQ: /* ZF=0 */
10922 case NE: /* ZF!=0 */
10924 /* Codes needing carry flag. */
10925 case GEU: /* CF=0 */
10926 case GTU: /* CF=0 & ZF=0 */
10927 case LTU: /* CF=1 */
10928 case LEU: /* CF=1 | ZF=1 */
10930 /* Codes possibly doable only with sign flag when
10931 comparing against zero. */
10932 case GE: /* SF=OF or SF=0 */
10933 case LT: /* SF<>OF or SF=1 */
10934 if (op1 == const0_rtx)
10937 /* For other cases Carry flag is not required. */
10939 /* Codes doable only with sign flag when comparing
10940 against zero, but we miss jump instruction for it
10941 so we need to use relational tests against overflow
10942 that thus needs to be zero. */
10943 case GT: /* ZF=0 & SF=OF */
10944 case LE: /* ZF=1 | SF<>OF */
10945 if (op1 == const0_rtx)
10949 /* strcmp pattern do (use flags) and combine may ask us for proper
10954 gcc_unreachable ();
10958 /* Return the fixed registers used for condition codes. */
10961 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10968 /* If two condition code modes are compatible, return a condition code
10969 mode which is compatible with both. Otherwise, return
10972 static enum machine_mode
10973 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10978 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10981 if ((m1 == CCGCmode && m2 == CCGOCmode)
10982 || (m1 == CCGOCmode && m2 == CCGCmode))
10988 gcc_unreachable ();
11010 /* These are only compatible with themselves, which we already
11016 /* Split comparison code CODE into comparisons we can do using branch
11017 instructions. BYPASS_CODE is comparison code for branch that will
11018 branch around FIRST_CODE and SECOND_CODE. If some of branches
11019 is not required, set value to UNKNOWN.
11020 We never require more than two branches. */
11023 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11024 enum rtx_code *first_code,
11025 enum rtx_code *second_code)
11027 *first_code = code;
11028 *bypass_code = UNKNOWN;
11029 *second_code = UNKNOWN;
11031 /* The fcomi comparison sets flags as follows:
11041 case GT: /* GTU - CF=0 & ZF=0 */
11042 case GE: /* GEU - CF=0 */
11043 case ORDERED: /* PF=0 */
11044 case UNORDERED: /* PF=1 */
11045 case UNEQ: /* EQ - ZF=1 */
11046 case UNLT: /* LTU - CF=1 */
11047 case UNLE: /* LEU - CF=1 | ZF=1 */
11048 case LTGT: /* EQ - ZF=0 */
11050 case LT: /* LTU - CF=1 - fails on unordered */
11051 *first_code = UNLT;
11052 *bypass_code = UNORDERED;
11054 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11055 *first_code = UNLE;
11056 *bypass_code = UNORDERED;
11058 case EQ: /* EQ - ZF=1 - fails on unordered */
11059 *first_code = UNEQ;
11060 *bypass_code = UNORDERED;
11062 case NE: /* NE - ZF=0 - fails on unordered */
11063 *first_code = LTGT;
11064 *second_code = UNORDERED;
11066 case UNGE: /* GEU - CF=0 - fails on unordered */
11068 *second_code = UNORDERED;
11070 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11072 *second_code = UNORDERED;
11075 gcc_unreachable ();
11077 if (!TARGET_IEEE_FP)
11079 *second_code = UNKNOWN;
11080 *bypass_code = UNKNOWN;
11084 /* Return cost of comparison done fcom + arithmetics operations on AX.
11085 All following functions do use number of instructions as a cost metrics.
11086 In future this should be tweaked to compute bytes for optimize_size and
11087 take into account performance of various instructions on various CPUs. */
11089 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11091 if (!TARGET_IEEE_FP)
11093 /* The cost of code output by ix86_expand_fp_compare. */
11117 gcc_unreachable ();
11121 /* Return cost of comparison done using fcomi operation.
11122 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11124 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11126 enum rtx_code bypass_code, first_code, second_code;
11127 /* Return arbitrarily high cost when instruction is not supported - this
11128 prevents gcc from using it. */
11131 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11132 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11135 /* Return cost of comparison done using sahf operation.
11136 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11138 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11140 enum rtx_code bypass_code, first_code, second_code;
11141 /* Return arbitrarily high cost when instruction is not preferred - this
11142 avoids gcc from using it. */
11143 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11145 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11146 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11149 /* Compute cost of the comparison done using any method.
11150 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11152 ix86_fp_comparison_cost (enum rtx_code code)
11154 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11157 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11158 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11160 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11161 if (min > sahf_cost)
11163 if (min > fcomi_cost)
11168 /* Return true if we should use an FCOMI instruction for this
11172 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11174 enum rtx_code swapped_code = swap_condition (code);
11176 return ((ix86_fp_comparison_cost (code)
11177 == ix86_fp_comparison_fcomi_cost (code))
11178 || (ix86_fp_comparison_cost (swapped_code)
11179 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11182 /* Swap, force into registers, or otherwise massage the two operands
11183 to a fp comparison. The operands are updated in place; the new
11184 comparison code is returned. */
11186 static enum rtx_code
11187 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11189 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11190 rtx op0 = *pop0, op1 = *pop1;
11191 enum machine_mode op_mode = GET_MODE (op0);
11192 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11194 /* All of the unordered compare instructions only work on registers.
11195 The same is true of the fcomi compare instructions. The XFmode
11196 compare instructions require registers except when comparing
11197 against zero or when converting operand 1 from fixed point to
11201 && (fpcmp_mode == CCFPUmode
11202 || (op_mode == XFmode
11203 && ! (standard_80387_constant_p (op0) == 1
11204 || standard_80387_constant_p (op1) == 1)
11205 && GET_CODE (op1) != FLOAT)
11206 || ix86_use_fcomi_compare (code)))
11208 op0 = force_reg (op_mode, op0);
11209 op1 = force_reg (op_mode, op1);
11213 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11214 things around if they appear profitable, otherwise force op0
11215 into a register. */
11217 if (standard_80387_constant_p (op0) == 0
11219 && ! (standard_80387_constant_p (op1) == 0
11223 tmp = op0, op0 = op1, op1 = tmp;
11224 code = swap_condition (code);
11228 op0 = force_reg (op_mode, op0);
11230 if (CONSTANT_P (op1))
11232 int tmp = standard_80387_constant_p (op1);
11234 op1 = validize_mem (force_const_mem (op_mode, op1));
11238 op1 = force_reg (op_mode, op1);
11241 op1 = force_reg (op_mode, op1);
11245 /* Try to rearrange the comparison to make it cheaper. */
11246 if (ix86_fp_comparison_cost (code)
11247 > ix86_fp_comparison_cost (swap_condition (code))
11248 && (REG_P (op1) || !no_new_pseudos))
11251 tmp = op0, op0 = op1, op1 = tmp;
11252 code = swap_condition (code);
11254 op0 = force_reg (op_mode, op0);
11262 /* Convert comparison codes we use to represent FP comparison to integer
11263 code that will result in proper branch. Return UNKNOWN if no such code
11267 ix86_fp_compare_code_to_integer (enum rtx_code code)
11296 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11299 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11300 rtx *second_test, rtx *bypass_test)
11302 enum machine_mode fpcmp_mode, intcmp_mode;
11304 int cost = ix86_fp_comparison_cost (code);
11305 enum rtx_code bypass_code, first_code, second_code;
11307 fpcmp_mode = ix86_fp_compare_mode (code);
11308 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11311 *second_test = NULL_RTX;
11313 *bypass_test = NULL_RTX;
11315 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11317 /* Do fcomi/sahf based test when profitable. */
11318 if ((TARGET_CMOVE || TARGET_SAHF)
11319 && (bypass_code == UNKNOWN || bypass_test)
11320 && (second_code == UNKNOWN || second_test)
11321 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11325 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11326 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11332 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11333 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11335 scratch = gen_reg_rtx (HImode);
11336 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11337 emit_insn (gen_x86_sahf_1 (scratch));
11340 /* The FP codes work out to act like unsigned. */
11341 intcmp_mode = fpcmp_mode;
11343 if (bypass_code != UNKNOWN)
11344 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11345 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11347 if (second_code != UNKNOWN)
11348 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11349 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11354 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11355 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11356 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11358 scratch = gen_reg_rtx (HImode);
11359 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11361 /* In the unordered case, we have to check C2 for NaN's, which
11362 doesn't happen to work out to anything nice combination-wise.
11363 So do some bit twiddling on the value we've got in AH to come
11364 up with an appropriate set of condition codes. */
11366 intcmp_mode = CCNOmode;
11371 if (code == GT || !TARGET_IEEE_FP)
11373 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11378 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11379 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11380 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11381 intcmp_mode = CCmode;
11387 if (code == LT && TARGET_IEEE_FP)
11389 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11390 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11391 intcmp_mode = CCmode;
11396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11402 if (code == GE || !TARGET_IEEE_FP)
11404 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11409 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11410 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11417 if (code == LE && TARGET_IEEE_FP)
11419 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11420 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11421 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11422 intcmp_mode = CCmode;
11427 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11433 if (code == EQ && TARGET_IEEE_FP)
11435 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11436 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11437 intcmp_mode = CCmode;
11442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11449 if (code == NE && TARGET_IEEE_FP)
11451 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11452 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11458 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11464 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11468 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11473 gcc_unreachable ();
11477 /* Return the test that should be put into the flags user, i.e.
11478 the bcc, scc, or cmov instruction. */
11479 return gen_rtx_fmt_ee (code, VOIDmode,
11480 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11485 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11488 op0 = ix86_compare_op0;
11489 op1 = ix86_compare_op1;
11492 *second_test = NULL_RTX;
11494 *bypass_test = NULL_RTX;
11496 if (ix86_compare_emitted)
11498 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11499 ix86_compare_emitted = NULL_RTX;
11501 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11503 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11504 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11505 second_test, bypass_test);
11508 ret = ix86_expand_int_compare (code, op0, op1);
11513 /* Return true if the CODE will result in nontrivial jump sequence. */
11515 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11517 enum rtx_code bypass_code, first_code, second_code;
11520 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11521 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11525 ix86_expand_branch (enum rtx_code code, rtx label)
11529 /* If we have emitted a compare insn, go straight to simple.
11530 ix86_expand_compare won't emit anything if ix86_compare_emitted
11532 if (ix86_compare_emitted)
11535 switch (GET_MODE (ix86_compare_op0))
11541 tmp = ix86_expand_compare (code, NULL, NULL);
11542 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11543 gen_rtx_LABEL_REF (VOIDmode, label),
11545 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11554 enum rtx_code bypass_code, first_code, second_code;
11556 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11557 &ix86_compare_op1);
11559 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11561 /* Check whether we will use the natural sequence with one jump. If
11562 so, we can expand jump early. Otherwise delay expansion by
11563 creating compound insn to not confuse optimizers. */
11564 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11567 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11568 gen_rtx_LABEL_REF (VOIDmode, label),
11569 pc_rtx, NULL_RTX, NULL_RTX);
11573 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11574 ix86_compare_op0, ix86_compare_op1);
11575 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11576 gen_rtx_LABEL_REF (VOIDmode, label),
11578 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11580 use_fcomi = ix86_use_fcomi_compare (code);
11581 vec = rtvec_alloc (3 + !use_fcomi);
11582 RTVEC_ELT (vec, 0) = tmp;
11584 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11586 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11589 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11591 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11600 /* Expand DImode branch into multiple compare+branch. */
11602 rtx lo[2], hi[2], label2;
11603 enum rtx_code code1, code2, code3;
11604 enum machine_mode submode;
11606 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11608 tmp = ix86_compare_op0;
11609 ix86_compare_op0 = ix86_compare_op1;
11610 ix86_compare_op1 = tmp;
11611 code = swap_condition (code);
11613 if (GET_MODE (ix86_compare_op0) == DImode)
11615 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11616 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11621 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11622 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11626 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11627 avoid two branches. This costs one extra insn, so disable when
11628 optimizing for size. */
11630 if ((code == EQ || code == NE)
11632 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11637 if (hi[1] != const0_rtx)
11638 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11639 NULL_RTX, 0, OPTAB_WIDEN);
11642 if (lo[1] != const0_rtx)
11643 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11644 NULL_RTX, 0, OPTAB_WIDEN);
11646 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11647 NULL_RTX, 0, OPTAB_WIDEN);
11649 ix86_compare_op0 = tmp;
11650 ix86_compare_op1 = const0_rtx;
11651 ix86_expand_branch (code, label);
11655 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11656 op1 is a constant and the low word is zero, then we can just
11657 examine the high word. */
11659 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11662 case LT: case LTU: case GE: case GEU:
11663 ix86_compare_op0 = hi[0];
11664 ix86_compare_op1 = hi[1];
11665 ix86_expand_branch (code, label);
11671 /* Otherwise, we need two or three jumps. */
11673 label2 = gen_label_rtx ();
11676 code2 = swap_condition (code);
11677 code3 = unsigned_condition (code);
11681 case LT: case GT: case LTU: case GTU:
11684 case LE: code1 = LT; code2 = GT; break;
11685 case GE: code1 = GT; code2 = LT; break;
11686 case LEU: code1 = LTU; code2 = GTU; break;
11687 case GEU: code1 = GTU; code2 = LTU; break;
11689 case EQ: code1 = UNKNOWN; code2 = NE; break;
11690 case NE: code2 = UNKNOWN; break;
11693 gcc_unreachable ();
11698 * if (hi(a) < hi(b)) goto true;
11699 * if (hi(a) > hi(b)) goto false;
11700 * if (lo(a) < lo(b)) goto true;
11704 ix86_compare_op0 = hi[0];
11705 ix86_compare_op1 = hi[1];
11707 if (code1 != UNKNOWN)
11708 ix86_expand_branch (code1, label);
11709 if (code2 != UNKNOWN)
11710 ix86_expand_branch (code2, label2);
11712 ix86_compare_op0 = lo[0];
11713 ix86_compare_op1 = lo[1];
11714 ix86_expand_branch (code3, label);
11716 if (code2 != UNKNOWN)
11717 emit_label (label2);
11722 gcc_unreachable ();
11726 /* Split branch based on floating point condition. */
11728 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11729 rtx target1, rtx target2, rtx tmp, rtx pushed)
11731 rtx second, bypass;
11732 rtx label = NULL_RTX;
11734 int bypass_probability = -1, second_probability = -1, probability = -1;
11737 if (target2 != pc_rtx)
11740 code = reverse_condition_maybe_unordered (code);
11745 condition = ix86_expand_fp_compare (code, op1, op2,
11746 tmp, &second, &bypass);
11748 /* Remove pushed operand from stack. */
11750 ix86_free_from_memory (GET_MODE (pushed));
11752 if (split_branch_probability >= 0)
11754 /* Distribute the probabilities across the jumps.
11755 Assume the BYPASS and SECOND to be always test
11757 probability = split_branch_probability;
11759 /* Value of 1 is low enough to make no need for probability
11760 to be updated. Later we may run some experiments and see
11761 if unordered values are more frequent in practice. */
11763 bypass_probability = 1;
11765 second_probability = 1;
11767 if (bypass != NULL_RTX)
11769 label = gen_label_rtx ();
11770 i = emit_jump_insn (gen_rtx_SET
11772 gen_rtx_IF_THEN_ELSE (VOIDmode,
11774 gen_rtx_LABEL_REF (VOIDmode,
11777 if (bypass_probability >= 0)
11779 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11780 GEN_INT (bypass_probability),
11783 i = emit_jump_insn (gen_rtx_SET
11785 gen_rtx_IF_THEN_ELSE (VOIDmode,
11786 condition, target1, target2)));
11787 if (probability >= 0)
11789 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11790 GEN_INT (probability),
11792 if (second != NULL_RTX)
11794 i = emit_jump_insn (gen_rtx_SET
11796 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11798 if (second_probability >= 0)
11800 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11801 GEN_INT (second_probability),
11804 if (label != NULL_RTX)
11805 emit_label (label);
11809 ix86_expand_setcc (enum rtx_code code, rtx dest)
11811 rtx ret, tmp, tmpreg, equiv;
11812 rtx second_test, bypass_test;
11814 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11815 return 0; /* FAIL */
11817 gcc_assert (GET_MODE (dest) == QImode);
11819 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11820 PUT_MODE (ret, QImode);
11825 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11826 if (bypass_test || second_test)
11828 rtx test = second_test;
11830 rtx tmp2 = gen_reg_rtx (QImode);
11833 gcc_assert (!second_test);
11834 test = bypass_test;
11836 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11838 PUT_MODE (test, QImode);
11839 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11842 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11844 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11847 /* Attach a REG_EQUAL note describing the comparison result. */
11848 if (ix86_compare_op0 && ix86_compare_op1)
11850 equiv = simplify_gen_relational (code, QImode,
11851 GET_MODE (ix86_compare_op0),
11852 ix86_compare_op0, ix86_compare_op1);
11853 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11856 return 1; /* DONE */
11859 /* Expand comparison setting or clearing carry flag. Return true when
11860 successful and set pop for the operation. */
11862 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11864 enum machine_mode mode =
11865 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11867 /* Do not handle DImode compares that go through special path.
11868 Also we can't deal with FP compares yet. This is possible to add. */
11869 if (mode == (TARGET_64BIT ? TImode : DImode))
11872 if (SCALAR_FLOAT_MODE_P (mode))
11874 rtx second_test = NULL, bypass_test = NULL;
11875 rtx compare_op, compare_seq;
11877 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11879 /* Shortcut: following common codes never translate
11880 into carry flag compares. */
11881 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11882 || code == ORDERED || code == UNORDERED)
11885 /* These comparisons require zero flag; swap operands so they won't. */
11886 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11887 && !TARGET_IEEE_FP)
11892 code = swap_condition (code);
11895 /* Try to expand the comparison and verify that we end up with carry flag
11896 based comparison. This is fails to be true only when we decide to expand
11897 comparison using arithmetic that is not too common scenario. */
11899 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11900 &second_test, &bypass_test);
11901 compare_seq = get_insns ();
11904 if (second_test || bypass_test)
11906 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11907 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11908 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11910 code = GET_CODE (compare_op);
11911 if (code != LTU && code != GEU)
11913 emit_insn (compare_seq);
11917 if (!INTEGRAL_MODE_P (mode))
11925 /* Convert a==0 into (unsigned)a<1. */
11928 if (op1 != const0_rtx)
11931 code = (code == EQ ? LTU : GEU);
11934 /* Convert a>b into b<a or a>=b-1. */
11937 if (CONST_INT_P (op1))
11939 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11940 /* Bail out on overflow. We still can swap operands but that
11941 would force loading of the constant into register. */
11942 if (op1 == const0_rtx
11943 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11945 code = (code == GTU ? GEU : LTU);
11952 code = (code == GTU ? LTU : GEU);
11956 /* Convert a>=0 into (unsigned)a<0x80000000. */
11959 if (mode == DImode || op1 != const0_rtx)
11961 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11962 code = (code == LT ? GEU : LTU);
11966 if (mode == DImode || op1 != constm1_rtx)
11968 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11969 code = (code == LE ? GEU : LTU);
11975 /* Swapping operands may cause constant to appear as first operand. */
11976 if (!nonimmediate_operand (op0, VOIDmode))
11978 if (no_new_pseudos)
11980 op0 = force_reg (mode, op0);
11982 ix86_compare_op0 = op0;
11983 ix86_compare_op1 = op1;
11984 *pop = ix86_expand_compare (code, NULL, NULL);
11985 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11990 ix86_expand_int_movcc (rtx operands[])
11992 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11993 rtx compare_seq, compare_op;
11994 rtx second_test, bypass_test;
11995 enum machine_mode mode = GET_MODE (operands[0]);
11996 bool sign_bit_compare_p = false;;
11999 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12000 compare_seq = get_insns ();
12003 compare_code = GET_CODE (compare_op);
12005 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12006 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12007 sign_bit_compare_p = true;
12009 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12010 HImode insns, we'd be swallowed in word prefix ops. */
12012 if ((mode != HImode || TARGET_FAST_PREFIX)
12013 && (mode != (TARGET_64BIT ? TImode : DImode))
12014 && CONST_INT_P (operands[2])
12015 && CONST_INT_P (operands[3]))
12017 rtx out = operands[0];
12018 HOST_WIDE_INT ct = INTVAL (operands[2]);
12019 HOST_WIDE_INT cf = INTVAL (operands[3]);
12020 HOST_WIDE_INT diff;
12023 /* Sign bit compares are better done using shifts than we do by using
12025 if (sign_bit_compare_p
12026 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12027 ix86_compare_op1, &compare_op))
12029 /* Detect overlap between destination and compare sources. */
12032 if (!sign_bit_compare_p)
12034 bool fpcmp = false;
12036 compare_code = GET_CODE (compare_op);
12038 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12039 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12042 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12045 /* To simplify rest of code, restrict to the GEU case. */
12046 if (compare_code == LTU)
12048 HOST_WIDE_INT tmp = ct;
12051 compare_code = reverse_condition (compare_code);
12052 code = reverse_condition (code);
12057 PUT_CODE (compare_op,
12058 reverse_condition_maybe_unordered
12059 (GET_CODE (compare_op)));
12061 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12065 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12066 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12067 tmp = gen_reg_rtx (mode);
12069 if (mode == DImode)
12070 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12072 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12076 if (code == GT || code == GE)
12077 code = reverse_condition (code);
12080 HOST_WIDE_INT tmp = ct;
12085 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12086 ix86_compare_op1, VOIDmode, 0, -1);
12099 tmp = expand_simple_binop (mode, PLUS,
12101 copy_rtx (tmp), 1, OPTAB_DIRECT);
12112 tmp = expand_simple_binop (mode, IOR,
12114 copy_rtx (tmp), 1, OPTAB_DIRECT);
12116 else if (diff == -1 && ct)
12126 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12128 tmp = expand_simple_binop (mode, PLUS,
12129 copy_rtx (tmp), GEN_INT (cf),
12130 copy_rtx (tmp), 1, OPTAB_DIRECT);
12138 * andl cf - ct, dest
12148 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12151 tmp = expand_simple_binop (mode, AND,
12153 gen_int_mode (cf - ct, mode),
12154 copy_rtx (tmp), 1, OPTAB_DIRECT);
12156 tmp = expand_simple_binop (mode, PLUS,
12157 copy_rtx (tmp), GEN_INT (ct),
12158 copy_rtx (tmp), 1, OPTAB_DIRECT);
12161 if (!rtx_equal_p (tmp, out))
12162 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12164 return 1; /* DONE */
12169 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12172 tmp = ct, ct = cf, cf = tmp;
12175 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12177 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12179 /* We may be reversing unordered compare to normal compare, that
12180 is not valid in general (we may convert non-trapping condition
12181 to trapping one), however on i386 we currently emit all
12182 comparisons unordered. */
12183 compare_code = reverse_condition_maybe_unordered (compare_code);
12184 code = reverse_condition_maybe_unordered (code);
12188 compare_code = reverse_condition (compare_code);
12189 code = reverse_condition (code);
12193 compare_code = UNKNOWN;
12194 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12195 && CONST_INT_P (ix86_compare_op1))
12197 if (ix86_compare_op1 == const0_rtx
12198 && (code == LT || code == GE))
12199 compare_code = code;
12200 else if (ix86_compare_op1 == constm1_rtx)
12204 else if (code == GT)
12209 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12210 if (compare_code != UNKNOWN
12211 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12212 && (cf == -1 || ct == -1))
12214 /* If lea code below could be used, only optimize
12215 if it results in a 2 insn sequence. */
12217 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12218 || diff == 3 || diff == 5 || diff == 9)
12219 || (compare_code == LT && ct == -1)
12220 || (compare_code == GE && cf == -1))
12223 * notl op1 (if necessary)
12231 code = reverse_condition (code);
12234 out = emit_store_flag (out, code, ix86_compare_op0,
12235 ix86_compare_op1, VOIDmode, 0, -1);
12237 out = expand_simple_binop (mode, IOR,
12239 out, 1, OPTAB_DIRECT);
12240 if (out != operands[0])
12241 emit_move_insn (operands[0], out);
12243 return 1; /* DONE */
12248 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12249 || diff == 3 || diff == 5 || diff == 9)
12250 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12252 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12258 * lea cf(dest*(ct-cf)),dest
12262 * This also catches the degenerate setcc-only case.
12268 out = emit_store_flag (out, code, ix86_compare_op0,
12269 ix86_compare_op1, VOIDmode, 0, 1);
12272 /* On x86_64 the lea instruction operates on Pmode, so we need
12273 to get arithmetics done in proper mode to match. */
12275 tmp = copy_rtx (out);
12279 out1 = copy_rtx (out);
12280 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12284 tmp = gen_rtx_PLUS (mode, tmp, out1);
12290 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12293 if (!rtx_equal_p (tmp, out))
12296 out = force_operand (tmp, copy_rtx (out));
12298 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12300 if (!rtx_equal_p (out, operands[0]))
12301 emit_move_insn (operands[0], copy_rtx (out));
12303 return 1; /* DONE */
12307 * General case: Jumpful:
12308 * xorl dest,dest cmpl op1, op2
12309 * cmpl op1, op2 movl ct, dest
12310 * setcc dest jcc 1f
12311 * decl dest movl cf, dest
12312 * andl (cf-ct),dest 1:
12315 * Size 20. Size 14.
12317 * This is reasonably steep, but branch mispredict costs are
12318 * high on modern cpus, so consider failing only if optimizing
12322 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12323 && BRANCH_COST >= 2)
12327 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12332 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12334 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12336 /* We may be reversing unordered compare to normal compare,
12337 that is not valid in general (we may convert non-trapping
12338 condition to trapping one), however on i386 we currently
12339 emit all comparisons unordered. */
12340 code = reverse_condition_maybe_unordered (code);
12344 code = reverse_condition (code);
12345 if (compare_code != UNKNOWN)
12346 compare_code = reverse_condition (compare_code);
12350 if (compare_code != UNKNOWN)
12352 /* notl op1 (if needed)
12357 For x < 0 (resp. x <= -1) there will be no notl,
12358 so if possible swap the constants to get rid of the
12360 True/false will be -1/0 while code below (store flag
12361 followed by decrement) is 0/-1, so the constants need
12362 to be exchanged once more. */
12364 if (compare_code == GE || !cf)
12366 code = reverse_condition (code);
12371 HOST_WIDE_INT tmp = cf;
12376 out = emit_store_flag (out, code, ix86_compare_op0,
12377 ix86_compare_op1, VOIDmode, 0, -1);
12381 out = emit_store_flag (out, code, ix86_compare_op0,
12382 ix86_compare_op1, VOIDmode, 0, 1);
12384 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12385 copy_rtx (out), 1, OPTAB_DIRECT);
12388 out = expand_simple_binop (mode, AND, copy_rtx (out),
12389 gen_int_mode (cf - ct, mode),
12390 copy_rtx (out), 1, OPTAB_DIRECT);
12392 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12393 copy_rtx (out), 1, OPTAB_DIRECT);
12394 if (!rtx_equal_p (out, operands[0]))
12395 emit_move_insn (operands[0], copy_rtx (out));
12397 return 1; /* DONE */
12401 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12403 /* Try a few things more with specific constants and a variable. */
12406 rtx var, orig_out, out, tmp;
12408 if (BRANCH_COST <= 2)
12409 return 0; /* FAIL */
12411 /* If one of the two operands is an interesting constant, load a
12412 constant with the above and mask it in with a logical operation. */
12414 if (CONST_INT_P (operands[2]))
12417 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12418 operands[3] = constm1_rtx, op = and_optab;
12419 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12420 operands[3] = const0_rtx, op = ior_optab;
12422 return 0; /* FAIL */
12424 else if (CONST_INT_P (operands[3]))
12427 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12428 operands[2] = constm1_rtx, op = and_optab;
12429 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12430 operands[2] = const0_rtx, op = ior_optab;
12432 return 0; /* FAIL */
12435 return 0; /* FAIL */
12437 orig_out = operands[0];
12438 tmp = gen_reg_rtx (mode);
12441 /* Recurse to get the constant loaded. */
12442 if (ix86_expand_int_movcc (operands) == 0)
12443 return 0; /* FAIL */
12445 /* Mask in the interesting variable. */
12446 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12448 if (!rtx_equal_p (out, orig_out))
12449 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12451 return 1; /* DONE */
12455 * For comparison with above,
12465 if (! nonimmediate_operand (operands[2], mode))
12466 operands[2] = force_reg (mode, operands[2]);
12467 if (! nonimmediate_operand (operands[3], mode))
12468 operands[3] = force_reg (mode, operands[3]);
12470 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12472 rtx tmp = gen_reg_rtx (mode);
12473 emit_move_insn (tmp, operands[3]);
12476 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12478 rtx tmp = gen_reg_rtx (mode);
12479 emit_move_insn (tmp, operands[2]);
12483 if (! register_operand (operands[2], VOIDmode)
12485 || ! register_operand (operands[3], VOIDmode)))
12486 operands[2] = force_reg (mode, operands[2]);
12489 && ! register_operand (operands[3], VOIDmode))
12490 operands[3] = force_reg (mode, operands[3]);
12492 emit_insn (compare_seq);
12493 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12494 gen_rtx_IF_THEN_ELSE (mode,
12495 compare_op, operands[2],
12498 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12499 gen_rtx_IF_THEN_ELSE (mode,
12501 copy_rtx (operands[3]),
12502 copy_rtx (operands[0]))));
12504 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12505 gen_rtx_IF_THEN_ELSE (mode,
12507 copy_rtx (operands[2]),
12508 copy_rtx (operands[0]))));
12510 return 1; /* DONE */
12513 /* Swap, force into registers, or otherwise massage the two operands
12514 to an sse comparison with a mask result. Thus we differ a bit from
12515 ix86_prepare_fp_compare_args which expects to produce a flags result.
12517 The DEST operand exists to help determine whether to commute commutative
12518 operators. The POP0/POP1 operands are updated in place. The new
12519 comparison code is returned, or UNKNOWN if not implementable. */
12521 static enum rtx_code
12522 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12523 rtx *pop0, rtx *pop1)
12531 /* We have no LTGT as an operator. We could implement it with
12532 NE & ORDERED, but this requires an extra temporary. It's
12533 not clear that it's worth it. */
12540 /* These are supported directly. */
12547 /* For commutative operators, try to canonicalize the destination
12548 operand to be first in the comparison - this helps reload to
12549 avoid extra moves. */
12550 if (!dest || !rtx_equal_p (dest, *pop1))
12558 /* These are not supported directly. Swap the comparison operands
12559 to transform into something that is supported. */
12563 code = swap_condition (code);
12567 gcc_unreachable ();
12573 /* Detect conditional moves that exactly match min/max operational
12574 semantics. Note that this is IEEE safe, as long as we don't
12575 interchange the operands.
12577 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12578 and TRUE if the operation is successful and instructions are emitted. */
12581 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12582 rtx cmp_op1, rtx if_true, rtx if_false)
12584 enum machine_mode mode;
12590 else if (code == UNGE)
12593 if_true = if_false;
12599 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12601 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12606 mode = GET_MODE (dest);
12608 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12609 but MODE may be a vector mode and thus not appropriate. */
12610 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12612 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12615 if_true = force_reg (mode, if_true);
12616 v = gen_rtvec (2, if_true, if_false);
12617 tmp = gen_rtx_UNSPEC (mode, v, u);
12621 code = is_min ? SMIN : SMAX;
12622 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12625 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12629 /* Expand an sse vector comparison. Return the register with the result. */
12632 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12633 rtx op_true, rtx op_false)
12635 enum machine_mode mode = GET_MODE (dest);
12638 cmp_op0 = force_reg (mode, cmp_op0);
12639 if (!nonimmediate_operand (cmp_op1, mode))
12640 cmp_op1 = force_reg (mode, cmp_op1);
12643 || reg_overlap_mentioned_p (dest, op_true)
12644 || reg_overlap_mentioned_p (dest, op_false))
12645 dest = gen_reg_rtx (mode);
12647 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12648 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12653 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12654 operations. This is used for both scalar and vector conditional moves. */
12657 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12659 enum machine_mode mode = GET_MODE (dest);
12662 if (op_false == CONST0_RTX (mode))
12664 op_true = force_reg (mode, op_true);
12665 x = gen_rtx_AND (mode, cmp, op_true);
12666 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12668 else if (op_true == CONST0_RTX (mode))
12670 op_false = force_reg (mode, op_false);
12671 x = gen_rtx_NOT (mode, cmp);
12672 x = gen_rtx_AND (mode, x, op_false);
12673 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12677 op_true = force_reg (mode, op_true);
12678 op_false = force_reg (mode, op_false);
12680 t2 = gen_reg_rtx (mode);
12682 t3 = gen_reg_rtx (mode);
12686 x = gen_rtx_AND (mode, op_true, cmp);
12687 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12689 x = gen_rtx_NOT (mode, cmp);
12690 x = gen_rtx_AND (mode, x, op_false);
12691 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12693 x = gen_rtx_IOR (mode, t3, t2);
12694 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12698 /* Expand a floating-point conditional move. Return true if successful. */
12701 ix86_expand_fp_movcc (rtx operands[])
12703 enum machine_mode mode = GET_MODE (operands[0]);
12704 enum rtx_code code = GET_CODE (operands[1]);
12705 rtx tmp, compare_op, second_test, bypass_test;
12707 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12709 enum machine_mode cmode;
12711 /* Since we've no cmove for sse registers, don't force bad register
12712 allocation just to gain access to it. Deny movcc when the
12713 comparison mode doesn't match the move mode. */
12714 cmode = GET_MODE (ix86_compare_op0);
12715 if (cmode == VOIDmode)
12716 cmode = GET_MODE (ix86_compare_op1);
12720 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12722 &ix86_compare_op1);
12723 if (code == UNKNOWN)
12726 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12727 ix86_compare_op1, operands[2],
12731 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12732 ix86_compare_op1, operands[2], operands[3]);
12733 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12737 /* The floating point conditional move instructions don't directly
12738 support conditions resulting from a signed integer comparison. */
12740 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12742 /* The floating point conditional move instructions don't directly
12743 support signed integer comparisons. */
12745 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12747 gcc_assert (!second_test && !bypass_test);
12748 tmp = gen_reg_rtx (QImode);
12749 ix86_expand_setcc (code, tmp);
12751 ix86_compare_op0 = tmp;
12752 ix86_compare_op1 = const0_rtx;
12753 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12755 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12757 tmp = gen_reg_rtx (mode);
12758 emit_move_insn (tmp, operands[3]);
12761 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12763 tmp = gen_reg_rtx (mode);
12764 emit_move_insn (tmp, operands[2]);
12768 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12769 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12770 operands[2], operands[3])));
12772 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12773 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12774 operands[3], operands[0])));
12776 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12777 gen_rtx_IF_THEN_ELSE (mode, second_test,
12778 operands[2], operands[0])));
12783 /* Expand a floating-point vector conditional move; a vcond operation
12784 rather than a movcc operation. */
12787 ix86_expand_fp_vcond (rtx operands[])
12789 enum rtx_code code = GET_CODE (operands[3]);
12792 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12793 &operands[4], &operands[5]);
12794 if (code == UNKNOWN)
12797 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12798 operands[5], operands[1], operands[2]))
12801 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12802 operands[1], operands[2]);
12803 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12807 /* Expand a signed/unsigned integral vector conditional move. */
12810 ix86_expand_int_vcond (rtx operands[])
12812 enum machine_mode mode = GET_MODE (operands[0]);
12813 enum rtx_code code = GET_CODE (operands[3]);
12814 bool negate = false;
12817 cop0 = operands[4];
12818 cop1 = operands[5];
12820 /* Canonicalize the comparison to EQ, GT, GTU. */
12831 code = reverse_condition (code);
12837 code = reverse_condition (code);
12843 code = swap_condition (code);
12844 x = cop0, cop0 = cop1, cop1 = x;
12848 gcc_unreachable ();
12851 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12852 if (mode == V2DImode)
12857 /* SSE4.1 supports EQ. */
12858 if (!TARGET_SSE4_1)
12864 /* SSE4.2 supports GT/GTU. */
12865 if (!TARGET_SSE4_2)
12870 gcc_unreachable ();
12874 /* Unsigned parallel compare is not supported by the hardware. Play some
12875 tricks to turn this into a signed comparison against 0. */
12878 cop0 = force_reg (mode, cop0);
12887 /* Perform a parallel modulo subtraction. */
12888 t1 = gen_reg_rtx (mode);
12889 emit_insn ((mode == V4SImode
12891 : gen_subv2di3) (t1, cop0, cop1));
12893 /* Extract the original sign bit of op0. */
12894 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12896 t2 = gen_reg_rtx (mode);
12897 emit_insn ((mode == V4SImode
12899 : gen_andv2di3) (t2, cop0, mask));
12901 /* XOR it back into the result of the subtraction. This results
12902 in the sign bit set iff we saw unsigned underflow. */
12903 x = gen_reg_rtx (mode);
12904 emit_insn ((mode == V4SImode
12906 : gen_xorv2di3) (x, t1, t2));
12914 /* Perform a parallel unsigned saturating subtraction. */
12915 x = gen_reg_rtx (mode);
12916 emit_insn (gen_rtx_SET (VOIDmode, x,
12917 gen_rtx_US_MINUS (mode, cop0, cop1)));
12924 gcc_unreachable ();
12928 cop1 = CONST0_RTX (mode);
12931 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12932 operands[1+negate], operands[2-negate]);
12934 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12935 operands[2-negate]);
12939 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12940 true if we should do zero extension, else sign extension. HIGH_P is
12941 true if we want the N/2 high elements, else the low elements. */
12944 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12946 enum machine_mode imode = GET_MODE (operands[1]);
12947 rtx (*unpack)(rtx, rtx, rtx);
12954 unpack = gen_vec_interleave_highv16qi;
12956 unpack = gen_vec_interleave_lowv16qi;
12960 unpack = gen_vec_interleave_highv8hi;
12962 unpack = gen_vec_interleave_lowv8hi;
12966 unpack = gen_vec_interleave_highv4si;
12968 unpack = gen_vec_interleave_lowv4si;
12971 gcc_unreachable ();
12974 dest = gen_lowpart (imode, operands[0]);
12977 se = force_reg (imode, CONST0_RTX (imode));
12979 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12980 operands[1], pc_rtx, pc_rtx);
12982 emit_insn (unpack (dest, operands[1], se));
12985 /* This function performs the same task as ix86_expand_sse_unpack,
12986 but with SSE4.1 instructions. */
12989 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12991 enum machine_mode imode = GET_MODE (operands[1]);
12992 rtx (*unpack)(rtx, rtx);
12999 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13001 unpack = gen_sse4_1_extendv8qiv8hi2;
13005 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13007 unpack = gen_sse4_1_extendv4hiv4si2;
13011 unpack = gen_sse4_1_zero_extendv2siv2di2;
13013 unpack = gen_sse4_1_extendv2siv2di2;
13016 gcc_unreachable ();
13019 dest = operands[0];
13022 /* Shift higher 8 bytes to lower 8 bytes. */
13023 src = gen_reg_rtx (imode);
13024 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13025 gen_lowpart (TImode, operands[1]),
13031 emit_insn (unpack (dest, src));
13034 /* Expand conditional increment or decrement using adb/sbb instructions.
13035 The default case using setcc followed by the conditional move can be
13036 done by generic code. */
13038 ix86_expand_int_addcc (rtx operands[])
13040 enum rtx_code code = GET_CODE (operands[1]);
13042 rtx val = const0_rtx;
13043 bool fpcmp = false;
13044 enum machine_mode mode = GET_MODE (operands[0]);
13046 if (operands[3] != const1_rtx
13047 && operands[3] != constm1_rtx)
13049 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13050 ix86_compare_op1, &compare_op))
13052 code = GET_CODE (compare_op);
13054 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13055 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13058 code = ix86_fp_compare_code_to_integer (code);
13065 PUT_CODE (compare_op,
13066 reverse_condition_maybe_unordered
13067 (GET_CODE (compare_op)));
13069 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13071 PUT_MODE (compare_op, mode);
13073 /* Construct either adc or sbb insn. */
13074 if ((code == LTU) == (operands[3] == constm1_rtx))
13076 switch (GET_MODE (operands[0]))
13079 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13082 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13085 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13088 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13091 gcc_unreachable ();
13096 switch (GET_MODE (operands[0]))
13099 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13102 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13105 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13108 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13111 gcc_unreachable ();
13114 return 1; /* DONE */
13118 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13119 works for floating pointer parameters and nonoffsetable memories.
13120 For pushes, it returns just stack offsets; the values will be saved
13121 in the right order. Maximally three parts are generated. */
13124 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13129 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13131 size = (GET_MODE_SIZE (mode) + 4) / 8;
13133 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13134 gcc_assert (size >= 2 && size <= 3);
13136 /* Optimize constant pool reference to immediates. This is used by fp
13137 moves, that force all constants to memory to allow combining. */
13138 if (MEM_P (operand) && MEM_READONLY_P (operand))
13140 rtx tmp = maybe_get_pool_constant (operand);
13145 if (MEM_P (operand) && !offsettable_memref_p (operand))
13147 /* The only non-offsetable memories we handle are pushes. */
13148 int ok = push_operand (operand, VOIDmode);
13152 operand = copy_rtx (operand);
13153 PUT_MODE (operand, Pmode);
13154 parts[0] = parts[1] = parts[2] = operand;
13158 if (GET_CODE (operand) == CONST_VECTOR)
13160 enum machine_mode imode = int_mode_for_mode (mode);
13161 /* Caution: if we looked through a constant pool memory above,
13162 the operand may actually have a different mode now. That's
13163 ok, since we want to pun this all the way back to an integer. */
13164 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13165 gcc_assert (operand != NULL);
13171 if (mode == DImode)
13172 split_di (&operand, 1, &parts[0], &parts[1]);
13175 if (REG_P (operand))
13177 gcc_assert (reload_completed);
13178 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13179 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13181 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13183 else if (offsettable_memref_p (operand))
13185 operand = adjust_address (operand, SImode, 0);
13186 parts[0] = operand;
13187 parts[1] = adjust_address (operand, SImode, 4);
13189 parts[2] = adjust_address (operand, SImode, 8);
13191 else if (GET_CODE (operand) == CONST_DOUBLE)
13196 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13200 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13201 parts[2] = gen_int_mode (l[2], SImode);
13204 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13207 gcc_unreachable ();
13209 parts[1] = gen_int_mode (l[1], SImode);
13210 parts[0] = gen_int_mode (l[0], SImode);
13213 gcc_unreachable ();
13218 if (mode == TImode)
13219 split_ti (&operand, 1, &parts[0], &parts[1]);
13220 if (mode == XFmode || mode == TFmode)
13222 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13223 if (REG_P (operand))
13225 gcc_assert (reload_completed);
13226 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13227 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13229 else if (offsettable_memref_p (operand))
13231 operand = adjust_address (operand, DImode, 0);
13232 parts[0] = operand;
13233 parts[1] = adjust_address (operand, upper_mode, 8);
13235 else if (GET_CODE (operand) == CONST_DOUBLE)
13240 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13241 real_to_target (l, &r, mode);
13243 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13244 if (HOST_BITS_PER_WIDE_INT >= 64)
13247 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13248 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13251 parts[0] = immed_double_const (l[0], l[1], DImode);
13253 if (upper_mode == SImode)
13254 parts[1] = gen_int_mode (l[2], SImode);
13255 else if (HOST_BITS_PER_WIDE_INT >= 64)
13258 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13259 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13262 parts[1] = immed_double_const (l[2], l[3], DImode);
13265 gcc_unreachable ();
13272 /* Emit insns to perform a move or push of DI, DF, and XF values.
13273 Return false when normal moves are needed; true when all required
13274 insns have been emitted. Operands 2-4 contain the input values
13275 int the correct order; operands 5-7 contain the output values. */
13278 ix86_split_long_move (rtx operands[])
13283 int collisions = 0;
13284 enum machine_mode mode = GET_MODE (operands[0]);
13286 /* The DFmode expanders may ask us to move double.
13287 For 64bit target this is single move. By hiding the fact
13288 here we simplify i386.md splitters. */
13289 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13291 /* Optimize constant pool reference to immediates. This is used by
13292 fp moves, that force all constants to memory to allow combining. */
13294 if (MEM_P (operands[1])
13295 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13296 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13297 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13298 if (push_operand (operands[0], VOIDmode))
13300 operands[0] = copy_rtx (operands[0]);
13301 PUT_MODE (operands[0], Pmode);
13304 operands[0] = gen_lowpart (DImode, operands[0]);
13305 operands[1] = gen_lowpart (DImode, operands[1]);
13306 emit_move_insn (operands[0], operands[1]);
13310 /* The only non-offsettable memory we handle is push. */
13311 if (push_operand (operands[0], VOIDmode))
13314 gcc_assert (!MEM_P (operands[0])
13315 || offsettable_memref_p (operands[0]));
13317 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13318 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13320 /* When emitting push, take care for source operands on the stack. */
13321 if (push && MEM_P (operands[1])
13322 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13325 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13326 XEXP (part[1][2], 0));
13327 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13328 XEXP (part[1][1], 0));
13331 /* We need to do copy in the right order in case an address register
13332 of the source overlaps the destination. */
13333 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13335 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13337 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13340 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13343 /* Collision in the middle part can be handled by reordering. */
13344 if (collisions == 1 && nparts == 3
13345 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13348 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13349 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13352 /* If there are more collisions, we can't handle it by reordering.
13353 Do an lea to the last part and use only one colliding move. */
13354 else if (collisions > 1)
13360 base = part[0][nparts - 1];
13362 /* Handle the case when the last part isn't valid for lea.
13363 Happens in 64-bit mode storing the 12-byte XFmode. */
13364 if (GET_MODE (base) != Pmode)
13365 base = gen_rtx_REG (Pmode, REGNO (base));
13367 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13368 part[1][0] = replace_equiv_address (part[1][0], base);
13369 part[1][1] = replace_equiv_address (part[1][1],
13370 plus_constant (base, UNITS_PER_WORD));
13372 part[1][2] = replace_equiv_address (part[1][2],
13373 plus_constant (base, 8));
13383 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13384 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13385 emit_move_insn (part[0][2], part[1][2]);
13390 /* In 64bit mode we don't have 32bit push available. In case this is
13391 register, it is OK - we will just use larger counterpart. We also
13392 retype memory - these comes from attempt to avoid REX prefix on
13393 moving of second half of TFmode value. */
13394 if (GET_MODE (part[1][1]) == SImode)
13396 switch (GET_CODE (part[1][1]))
13399 part[1][1] = adjust_address (part[1][1], DImode, 0);
13403 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13407 gcc_unreachable ();
13410 if (GET_MODE (part[1][0]) == SImode)
13411 part[1][0] = part[1][1];
13414 emit_move_insn (part[0][1], part[1][1]);
13415 emit_move_insn (part[0][0], part[1][0]);
13419 /* Choose correct order to not overwrite the source before it is copied. */
13420 if ((REG_P (part[0][0])
13421 && REG_P (part[1][1])
13422 && (REGNO (part[0][0]) == REGNO (part[1][1])
13424 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13426 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13430 operands[2] = part[0][2];
13431 operands[3] = part[0][1];
13432 operands[4] = part[0][0];
13433 operands[5] = part[1][2];
13434 operands[6] = part[1][1];
13435 operands[7] = part[1][0];
13439 operands[2] = part[0][1];
13440 operands[3] = part[0][0];
13441 operands[5] = part[1][1];
13442 operands[6] = part[1][0];
13449 operands[2] = part[0][0];
13450 operands[3] = part[0][1];
13451 operands[4] = part[0][2];
13452 operands[5] = part[1][0];
13453 operands[6] = part[1][1];
13454 operands[7] = part[1][2];
13458 operands[2] = part[0][0];
13459 operands[3] = part[0][1];
13460 operands[5] = part[1][0];
13461 operands[6] = part[1][1];
13465 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13468 if (CONST_INT_P (operands[5])
13469 && operands[5] != const0_rtx
13470 && REG_P (operands[2]))
13472 if (CONST_INT_P (operands[6])
13473 && INTVAL (operands[6]) == INTVAL (operands[5]))
13474 operands[6] = operands[2];
13477 && CONST_INT_P (operands[7])
13478 && INTVAL (operands[7]) == INTVAL (operands[5]))
13479 operands[7] = operands[2];
13483 && CONST_INT_P (operands[6])
13484 && operands[6] != const0_rtx
13485 && REG_P (operands[3])
13486 && CONST_INT_P (operands[7])
13487 && INTVAL (operands[7]) == INTVAL (operands[6]))
13488 operands[7] = operands[3];
13491 emit_move_insn (operands[2], operands[5]);
13492 emit_move_insn (operands[3], operands[6]);
13494 emit_move_insn (operands[4], operands[7]);
13499 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13500 left shift by a constant, either using a single shift or
13501 a sequence of add instructions. */
13504 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13508 emit_insn ((mode == DImode
13510 : gen_adddi3) (operand, operand, operand));
13512 else if (!optimize_size
13513 && count * ix86_cost->add <= ix86_cost->shift_const)
13516 for (i=0; i<count; i++)
13518 emit_insn ((mode == DImode
13520 : gen_adddi3) (operand, operand, operand));
13524 emit_insn ((mode == DImode
13526 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13530 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13532 rtx low[2], high[2];
13534 const int single_width = mode == DImode ? 32 : 64;
13536 if (CONST_INT_P (operands[2]))
13538 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13539 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13541 if (count >= single_width)
13543 emit_move_insn (high[0], low[1]);
13544 emit_move_insn (low[0], const0_rtx);
13546 if (count > single_width)
13547 ix86_expand_ashl_const (high[0], count - single_width, mode);
13551 if (!rtx_equal_p (operands[0], operands[1]))
13552 emit_move_insn (operands[0], operands[1]);
13553 emit_insn ((mode == DImode
13555 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13556 ix86_expand_ashl_const (low[0], count, mode);
13561 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13563 if (operands[1] == const1_rtx)
13565 /* Assuming we've chosen a QImode capable registers, then 1 << N
13566 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13567 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13569 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13571 ix86_expand_clear (low[0]);
13572 ix86_expand_clear (high[0]);
13573 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13575 d = gen_lowpart (QImode, low[0]);
13576 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13577 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13578 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13580 d = gen_lowpart (QImode, high[0]);
13581 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13582 s = gen_rtx_NE (QImode, flags, const0_rtx);
13583 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13586 /* Otherwise, we can get the same results by manually performing
13587 a bit extract operation on bit 5/6, and then performing the two
13588 shifts. The two methods of getting 0/1 into low/high are exactly
13589 the same size. Avoiding the shift in the bit extract case helps
13590 pentium4 a bit; no one else seems to care much either way. */
13595 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13596 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13598 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13599 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13601 emit_insn ((mode == DImode
13603 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13604 emit_insn ((mode == DImode
13606 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13607 emit_move_insn (low[0], high[0]);
13608 emit_insn ((mode == DImode
13610 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13613 emit_insn ((mode == DImode
13615 : gen_ashldi3) (low[0], low[0], operands[2]));
13616 emit_insn ((mode == DImode
13618 : gen_ashldi3) (high[0], high[0], operands[2]));
13622 if (operands[1] == constm1_rtx)
13624 /* For -1 << N, we can avoid the shld instruction, because we
13625 know that we're shifting 0...31/63 ones into a -1. */
13626 emit_move_insn (low[0], constm1_rtx);
13628 emit_move_insn (high[0], low[0]);
13630 emit_move_insn (high[0], constm1_rtx);
13634 if (!rtx_equal_p (operands[0], operands[1]))
13635 emit_move_insn (operands[0], operands[1]);
13637 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13638 emit_insn ((mode == DImode
13640 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13643 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13645 if (TARGET_CMOVE && scratch)
13647 ix86_expand_clear (scratch);
13648 emit_insn ((mode == DImode
13649 ? gen_x86_shift_adj_1
13650 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13653 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13657 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13659 rtx low[2], high[2];
13661 const int single_width = mode == DImode ? 32 : 64;
13663 if (CONST_INT_P (operands[2]))
13665 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13666 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13668 if (count == single_width * 2 - 1)
13670 emit_move_insn (high[0], high[1]);
13671 emit_insn ((mode == DImode
13673 : gen_ashrdi3) (high[0], high[0],
13674 GEN_INT (single_width - 1)));
13675 emit_move_insn (low[0], high[0]);
13678 else if (count >= single_width)
13680 emit_move_insn (low[0], high[1]);
13681 emit_move_insn (high[0], low[0]);
13682 emit_insn ((mode == DImode
13684 : gen_ashrdi3) (high[0], high[0],
13685 GEN_INT (single_width - 1)));
13686 if (count > single_width)
13687 emit_insn ((mode == DImode
13689 : gen_ashrdi3) (low[0], low[0],
13690 GEN_INT (count - single_width)));
13694 if (!rtx_equal_p (operands[0], operands[1]))
13695 emit_move_insn (operands[0], operands[1]);
13696 emit_insn ((mode == DImode
13698 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13699 emit_insn ((mode == DImode
13701 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13706 if (!rtx_equal_p (operands[0], operands[1]))
13707 emit_move_insn (operands[0], operands[1]);
13709 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13711 emit_insn ((mode == DImode
13713 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13714 emit_insn ((mode == DImode
13716 : gen_ashrdi3) (high[0], high[0], operands[2]));
13718 if (TARGET_CMOVE && scratch)
13720 emit_move_insn (scratch, high[0]);
13721 emit_insn ((mode == DImode
13723 : gen_ashrdi3) (scratch, scratch,
13724 GEN_INT (single_width - 1)));
13725 emit_insn ((mode == DImode
13726 ? gen_x86_shift_adj_1
13727 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13731 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13736 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13738 rtx low[2], high[2];
13740 const int single_width = mode == DImode ? 32 : 64;
13742 if (CONST_INT_P (operands[2]))
13744 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13745 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13747 if (count >= single_width)
13749 emit_move_insn (low[0], high[1]);
13750 ix86_expand_clear (high[0]);
13752 if (count > single_width)
13753 emit_insn ((mode == DImode
13755 : gen_lshrdi3) (low[0], low[0],
13756 GEN_INT (count - single_width)));
13760 if (!rtx_equal_p (operands[0], operands[1]))
13761 emit_move_insn (operands[0], operands[1]);
13762 emit_insn ((mode == DImode
13764 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13765 emit_insn ((mode == DImode
13767 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13772 if (!rtx_equal_p (operands[0], operands[1]))
13773 emit_move_insn (operands[0], operands[1]);
13775 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13777 emit_insn ((mode == DImode
13779 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13780 emit_insn ((mode == DImode
13782 : gen_lshrdi3) (high[0], high[0], operands[2]));
13784 /* Heh. By reversing the arguments, we can reuse this pattern. */
13785 if (TARGET_CMOVE && scratch)
13787 ix86_expand_clear (scratch);
13788 emit_insn ((mode == DImode
13789 ? gen_x86_shift_adj_1
13790 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13794 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13798 /* Predict just emitted jump instruction to be taken with probability PROB. */
13800 predict_jump (int prob)
13802 rtx insn = get_last_insn ();
13803 gcc_assert (JUMP_P (insn));
13805 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13810 /* Helper function for the string operations below. Dest VARIABLE whether
13811 it is aligned to VALUE bytes. If true, jump to the label. */
13813 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13815 rtx label = gen_label_rtx ();
13816 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13817 if (GET_MODE (variable) == DImode)
13818 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13820 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13821 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13824 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13826 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13830 /* Adjust COUNTER by the VALUE. */
13832 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13834 if (GET_MODE (countreg) == DImode)
13835 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13837 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13840 /* Zero extend possibly SImode EXP to Pmode register. */
13842 ix86_zero_extend_to_Pmode (rtx exp)
13845 if (GET_MODE (exp) == VOIDmode)
13846 return force_reg (Pmode, exp);
13847 if (GET_MODE (exp) == Pmode)
13848 return copy_to_mode_reg (Pmode, exp);
13849 r = gen_reg_rtx (Pmode);
13850 emit_insn (gen_zero_extendsidi2 (r, exp));
13854 /* Divide COUNTREG by SCALE. */
13856 scale_counter (rtx countreg, int scale)
13859 rtx piece_size_mask;
13863 if (CONST_INT_P (countreg))
13864 return GEN_INT (INTVAL (countreg) / scale);
13865 gcc_assert (REG_P (countreg));
13867 piece_size_mask = GEN_INT (scale - 1);
13868 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13869 GEN_INT (exact_log2 (scale)),
13870 NULL, 1, OPTAB_DIRECT);
13874 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13875 DImode for constant loop counts. */
13877 static enum machine_mode
13878 counter_mode (rtx count_exp)
13880 if (GET_MODE (count_exp) != VOIDmode)
13881 return GET_MODE (count_exp);
13882 if (GET_CODE (count_exp) != CONST_INT)
13884 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13889 /* When SRCPTR is non-NULL, output simple loop to move memory
13890 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13891 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13892 equivalent loop to set memory by VALUE (supposed to be in MODE).
13894 The size is rounded down to whole number of chunk size moved at once.
13895 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13899 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13900 rtx destptr, rtx srcptr, rtx value,
13901 rtx count, enum machine_mode mode, int unroll,
13904 rtx out_label, top_label, iter, tmp;
13905 enum machine_mode iter_mode = counter_mode (count);
13906 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13907 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13913 top_label = gen_label_rtx ();
13914 out_label = gen_label_rtx ();
13915 iter = gen_reg_rtx (iter_mode);
13917 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13918 NULL, 1, OPTAB_DIRECT);
13919 /* Those two should combine. */
13920 if (piece_size == const1_rtx)
13922 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13924 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13926 emit_move_insn (iter, const0_rtx);
13928 emit_label (top_label);
13930 tmp = convert_modes (Pmode, iter_mode, iter, true);
13931 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13932 destmem = change_address (destmem, mode, x_addr);
13936 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13937 srcmem = change_address (srcmem, mode, y_addr);
13939 /* When unrolling for chips that reorder memory reads and writes,
13940 we can save registers by using single temporary.
13941 Also using 4 temporaries is overkill in 32bit mode. */
13942 if (!TARGET_64BIT && 0)
13944 for (i = 0; i < unroll; i++)
13949 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13951 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13953 emit_move_insn (destmem, srcmem);
13959 gcc_assert (unroll <= 4);
13960 for (i = 0; i < unroll; i++)
13962 tmpreg[i] = gen_reg_rtx (mode);
13966 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13968 emit_move_insn (tmpreg[i], srcmem);
13970 for (i = 0; i < unroll; i++)
13975 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13977 emit_move_insn (destmem, tmpreg[i]);
13982 for (i = 0; i < unroll; i++)
13986 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13987 emit_move_insn (destmem, value);
13990 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13991 true, OPTAB_LIB_WIDEN);
13993 emit_move_insn (iter, tmp);
13995 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13997 if (expected_size != -1)
13999 expected_size /= GET_MODE_SIZE (mode) * unroll;
14000 if (expected_size == 0)
14002 else if (expected_size > REG_BR_PROB_BASE)
14003 predict_jump (REG_BR_PROB_BASE - 1);
14005 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14008 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14009 iter = ix86_zero_extend_to_Pmode (iter);
14010 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14011 true, OPTAB_LIB_WIDEN);
14012 if (tmp != destptr)
14013 emit_move_insn (destptr, tmp);
14016 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14017 true, OPTAB_LIB_WIDEN);
14019 emit_move_insn (srcptr, tmp);
14021 emit_label (out_label);
14024 /* Output "rep; mov" instruction.
14025 Arguments have same meaning as for previous function */
14027 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14028 rtx destptr, rtx srcptr,
14030 enum machine_mode mode)
14036 /* If the size is known, it is shorter to use rep movs. */
14037 if (mode == QImode && CONST_INT_P (count)
14038 && !(INTVAL (count) & 3))
14041 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14042 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14043 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14044 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14045 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14046 if (mode != QImode)
14048 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14049 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14050 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14051 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14052 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14053 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14057 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14058 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14060 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14064 /* Output "rep; stos" instruction.
14065 Arguments have same meaning as for previous function */
14067 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14069 enum machine_mode mode)
14074 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14075 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14076 value = force_reg (mode, gen_lowpart (mode, value));
14077 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14078 if (mode != QImode)
14080 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14081 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14082 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14085 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14086 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14090 emit_strmov (rtx destmem, rtx srcmem,
14091 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14093 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14094 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14095 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14098 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14100 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14101 rtx destptr, rtx srcptr, rtx count, int max_size)
14104 if (CONST_INT_P (count))
14106 HOST_WIDE_INT countval = INTVAL (count);
14109 if ((countval & 0x10) && max_size > 16)
14113 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14114 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14117 gcc_unreachable ();
14120 if ((countval & 0x08) && max_size > 8)
14123 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14126 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14127 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14131 if ((countval & 0x04) && max_size > 4)
14133 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14136 if ((countval & 0x02) && max_size > 2)
14138 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14141 if ((countval & 0x01) && max_size > 1)
14143 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14150 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14151 count, 1, OPTAB_DIRECT);
14152 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14153 count, QImode, 1, 4);
14157 /* When there are stringops, we can cheaply increase dest and src pointers.
14158 Otherwise we save code size by maintaining offset (zero is readily
14159 available from preceding rep operation) and using x86 addressing modes.
14161 if (TARGET_SINGLE_STRINGOP)
14165 rtx label = ix86_expand_aligntest (count, 4, true);
14166 src = change_address (srcmem, SImode, srcptr);
14167 dest = change_address (destmem, SImode, destptr);
14168 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14169 emit_label (label);
14170 LABEL_NUSES (label) = 1;
14174 rtx label = ix86_expand_aligntest (count, 2, true);
14175 src = change_address (srcmem, HImode, srcptr);
14176 dest = change_address (destmem, HImode, destptr);
14177 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14178 emit_label (label);
14179 LABEL_NUSES (label) = 1;
14183 rtx label = ix86_expand_aligntest (count, 1, true);
14184 src = change_address (srcmem, QImode, srcptr);
14185 dest = change_address (destmem, QImode, destptr);
14186 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14187 emit_label (label);
14188 LABEL_NUSES (label) = 1;
14193 rtx offset = force_reg (Pmode, const0_rtx);
14198 rtx label = ix86_expand_aligntest (count, 4, true);
14199 src = change_address (srcmem, SImode, srcptr);
14200 dest = change_address (destmem, SImode, destptr);
14201 emit_move_insn (dest, src);
14202 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14203 true, OPTAB_LIB_WIDEN);
14205 emit_move_insn (offset, tmp);
14206 emit_label (label);
14207 LABEL_NUSES (label) = 1;
14211 rtx label = ix86_expand_aligntest (count, 2, true);
14212 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14213 src = change_address (srcmem, HImode, tmp);
14214 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14215 dest = change_address (destmem, HImode, tmp);
14216 emit_move_insn (dest, src);
14217 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14218 true, OPTAB_LIB_WIDEN);
14220 emit_move_insn (offset, tmp);
14221 emit_label (label);
14222 LABEL_NUSES (label) = 1;
14226 rtx label = ix86_expand_aligntest (count, 1, true);
14227 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14228 src = change_address (srcmem, QImode, tmp);
14229 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14230 dest = change_address (destmem, QImode, tmp);
14231 emit_move_insn (dest, src);
14232 emit_label (label);
14233 LABEL_NUSES (label) = 1;
14238 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14240 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14241 rtx count, int max_size)
14244 expand_simple_binop (counter_mode (count), AND, count,
14245 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14246 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14247 gen_lowpart (QImode, value), count, QImode,
14251 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14253 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14257 if (CONST_INT_P (count))
14259 HOST_WIDE_INT countval = INTVAL (count);
14262 if ((countval & 0x10) && max_size > 16)
14266 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14267 emit_insn (gen_strset (destptr, dest, value));
14268 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14269 emit_insn (gen_strset (destptr, dest, value));
14272 gcc_unreachable ();
14275 if ((countval & 0x08) && max_size > 8)
14279 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14280 emit_insn (gen_strset (destptr, dest, value));
14284 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14285 emit_insn (gen_strset (destptr, dest, value));
14286 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14287 emit_insn (gen_strset (destptr, dest, value));
14291 if ((countval & 0x04) && max_size > 4)
14293 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14294 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14297 if ((countval & 0x02) && max_size > 2)
14299 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14300 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14303 if ((countval & 0x01) && max_size > 1)
14305 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14306 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14313 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14318 rtx label = ix86_expand_aligntest (count, 16, true);
14321 dest = change_address (destmem, DImode, destptr);
14322 emit_insn (gen_strset (destptr, dest, value));
14323 emit_insn (gen_strset (destptr, dest, value));
14327 dest = change_address (destmem, SImode, destptr);
14328 emit_insn (gen_strset (destptr, dest, value));
14329 emit_insn (gen_strset (destptr, dest, value));
14330 emit_insn (gen_strset (destptr, dest, value));
14331 emit_insn (gen_strset (destptr, dest, value));
14333 emit_label (label);
14334 LABEL_NUSES (label) = 1;
14338 rtx label = ix86_expand_aligntest (count, 8, true);
14341 dest = change_address (destmem, DImode, destptr);
14342 emit_insn (gen_strset (destptr, dest, value));
14346 dest = change_address (destmem, SImode, destptr);
14347 emit_insn (gen_strset (destptr, dest, value));
14348 emit_insn (gen_strset (destptr, dest, value));
14350 emit_label (label);
14351 LABEL_NUSES (label) = 1;
14355 rtx label = ix86_expand_aligntest (count, 4, true);
14356 dest = change_address (destmem, SImode, destptr);
14357 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14358 emit_label (label);
14359 LABEL_NUSES (label) = 1;
14363 rtx label = ix86_expand_aligntest (count, 2, true);
14364 dest = change_address (destmem, HImode, destptr);
14365 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14366 emit_label (label);
14367 LABEL_NUSES (label) = 1;
14371 rtx label = ix86_expand_aligntest (count, 1, true);
14372 dest = change_address (destmem, QImode, destptr);
14373 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14374 emit_label (label);
14375 LABEL_NUSES (label) = 1;
14379 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14380 DESIRED_ALIGNMENT. */
14382 expand_movmem_prologue (rtx destmem, rtx srcmem,
14383 rtx destptr, rtx srcptr, rtx count,
14384 int align, int desired_alignment)
14386 if (align <= 1 && desired_alignment > 1)
14388 rtx label = ix86_expand_aligntest (destptr, 1, false);
14389 srcmem = change_address (srcmem, QImode, srcptr);
14390 destmem = change_address (destmem, QImode, destptr);
14391 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14392 ix86_adjust_counter (count, 1);
14393 emit_label (label);
14394 LABEL_NUSES (label) = 1;
14396 if (align <= 2 && desired_alignment > 2)
14398 rtx label = ix86_expand_aligntest (destptr, 2, false);
14399 srcmem = change_address (srcmem, HImode, srcptr);
14400 destmem = change_address (destmem, HImode, destptr);
14401 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14402 ix86_adjust_counter (count, 2);
14403 emit_label (label);
14404 LABEL_NUSES (label) = 1;
14406 if (align <= 4 && desired_alignment > 4)
14408 rtx label = ix86_expand_aligntest (destptr, 4, false);
14409 srcmem = change_address (srcmem, SImode, srcptr);
14410 destmem = change_address (destmem, SImode, destptr);
14411 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14412 ix86_adjust_counter (count, 4);
14413 emit_label (label);
14414 LABEL_NUSES (label) = 1;
14416 gcc_assert (desired_alignment <= 8);
14419 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14420 DESIRED_ALIGNMENT. */
14422 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14423 int align, int desired_alignment)
14425 if (align <= 1 && desired_alignment > 1)
14427 rtx label = ix86_expand_aligntest (destptr, 1, false);
14428 destmem = change_address (destmem, QImode, destptr);
14429 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14430 ix86_adjust_counter (count, 1);
14431 emit_label (label);
14432 LABEL_NUSES (label) = 1;
14434 if (align <= 2 && desired_alignment > 2)
14436 rtx label = ix86_expand_aligntest (destptr, 2, false);
14437 destmem = change_address (destmem, HImode, destptr);
14438 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14439 ix86_adjust_counter (count, 2);
14440 emit_label (label);
14441 LABEL_NUSES (label) = 1;
14443 if (align <= 4 && desired_alignment > 4)
14445 rtx label = ix86_expand_aligntest (destptr, 4, false);
14446 destmem = change_address (destmem, SImode, destptr);
14447 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14448 ix86_adjust_counter (count, 4);
14449 emit_label (label);
14450 LABEL_NUSES (label) = 1;
14452 gcc_assert (desired_alignment <= 8);
14455 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14456 static enum stringop_alg
14457 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14458 int *dynamic_check)
14460 const struct stringop_algs * algs;
14462 *dynamic_check = -1;
14464 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14466 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14467 if (stringop_alg != no_stringop)
14468 return stringop_alg;
14469 /* rep; movq or rep; movl is the smallest variant. */
14470 else if (optimize_size)
14472 if (!count || (count & 3))
14473 return rep_prefix_1_byte;
14475 return rep_prefix_4_byte;
14477 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14479 else if (expected_size != -1 && expected_size < 4)
14480 return loop_1_byte;
14481 else if (expected_size != -1)
14484 enum stringop_alg alg = libcall;
14485 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14487 gcc_assert (algs->size[i].max);
14488 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14490 if (algs->size[i].alg != libcall)
14491 alg = algs->size[i].alg;
14492 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14493 last non-libcall inline algorithm. */
14494 if (TARGET_INLINE_ALL_STRINGOPS)
14496 /* When the current size is best to be copied by a libcall,
14497 but we are still forced to inline, run the heuristic bellow
14498 that will pick code for medium sized blocks. */
14499 if (alg != libcall)
14504 return algs->size[i].alg;
14507 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14509 /* When asked to inline the call anyway, try to pick meaningful choice.
14510 We look for maximal size of block that is faster to copy by hand and
14511 take blocks of at most of that size guessing that average size will
14512 be roughly half of the block.
14514 If this turns out to be bad, we might simply specify the preferred
14515 choice in ix86_costs. */
14516 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14517 && algs->unknown_size == libcall)
14520 enum stringop_alg alg;
14523 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14524 if (algs->size[i].alg != libcall && algs->size[i].alg)
14525 max = algs->size[i].max;
14528 alg = decide_alg (count, max / 2, memset, dynamic_check);
14529 gcc_assert (*dynamic_check == -1);
14530 gcc_assert (alg != libcall);
14531 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14532 *dynamic_check = max;
14535 return algs->unknown_size;
14538 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14539 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14541 decide_alignment (int align,
14542 enum stringop_alg alg,
14545 int desired_align = 0;
14549 gcc_unreachable ();
14551 case unrolled_loop:
14552 desired_align = GET_MODE_SIZE (Pmode);
14554 case rep_prefix_8_byte:
14557 case rep_prefix_4_byte:
14558 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14559 copying whole cacheline at once. */
14560 if (TARGET_PENTIUMPRO)
14565 case rep_prefix_1_byte:
14566 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14567 copying whole cacheline at once. */
14568 if (TARGET_PENTIUMPRO)
14582 if (desired_align < align)
14583 desired_align = align;
14584 if (expected_size != -1 && expected_size < 4)
14585 desired_align = align;
14586 return desired_align;
14589 /* Return the smallest power of 2 greater than VAL. */
14591 smallest_pow2_greater_than (int val)
14599 /* Expand string move (memcpy) operation. Use i386 string operations when
14600 profitable. expand_clrmem contains similar code. The code depends upon
14601 architecture, block size and alignment, but always has the same
14604 1) Prologue guard: Conditional that jumps up to epilogues for small
14605 blocks that can be handled by epilogue alone. This is faster but
14606 also needed for correctness, since prologue assume the block is larger
14607 than the desired alignment.
14609 Optional dynamic check for size and libcall for large
14610 blocks is emitted here too, with -minline-stringops-dynamically.
14612 2) Prologue: copy first few bytes in order to get destination aligned
14613 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14614 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14615 We emit either a jump tree on power of two sized blocks, or a byte loop.
14617 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14618 with specified algorithm.
14620 4) Epilogue: code copying tail of the block that is too small to be
14621 handled by main body (or up to size guarded by prologue guard). */
14624 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14625 rtx expected_align_exp, rtx expected_size_exp)
14631 rtx jump_around_label = NULL;
14632 HOST_WIDE_INT align = 1;
14633 unsigned HOST_WIDE_INT count = 0;
14634 HOST_WIDE_INT expected_size = -1;
14635 int size_needed = 0, epilogue_size_needed;
14636 int desired_align = 0;
14637 enum stringop_alg alg;
14640 if (CONST_INT_P (align_exp))
14641 align = INTVAL (align_exp);
14642 /* i386 can do misaligned access on reasonably increased cost. */
14643 if (CONST_INT_P (expected_align_exp)
14644 && INTVAL (expected_align_exp) > align)
14645 align = INTVAL (expected_align_exp);
14646 if (CONST_INT_P (count_exp))
14647 count = expected_size = INTVAL (count_exp);
14648 if (CONST_INT_P (expected_size_exp) && count == 0)
14649 expected_size = INTVAL (expected_size_exp);
14651 /* Step 0: Decide on preferred algorithm, desired alignment and
14652 size of chunks to be copied by main loop. */
14654 alg = decide_alg (count, expected_size, false, &dynamic_check);
14655 desired_align = decide_alignment (align, alg, expected_size);
14657 if (!TARGET_ALIGN_STRINGOPS)
14658 align = desired_align;
14660 if (alg == libcall)
14662 gcc_assert (alg != no_stringop);
14664 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14665 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14666 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14671 gcc_unreachable ();
14673 size_needed = GET_MODE_SIZE (Pmode);
14675 case unrolled_loop:
14676 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14678 case rep_prefix_8_byte:
14681 case rep_prefix_4_byte:
14684 case rep_prefix_1_byte:
14690 epilogue_size_needed = size_needed;
14692 /* Step 1: Prologue guard. */
14694 /* Alignment code needs count to be in register. */
14695 if (CONST_INT_P (count_exp) && desired_align > align)
14697 enum machine_mode mode = SImode;
14698 if (TARGET_64BIT && (count & ~0xffffffff))
14700 count_exp = force_reg (mode, count_exp);
14702 gcc_assert (desired_align >= 1 && align >= 1);
14704 /* Ensure that alignment prologue won't copy past end of block. */
14705 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14707 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14708 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14709 Make sure it is power of 2. */
14710 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14712 label = gen_label_rtx ();
14713 emit_cmp_and_jump_insns (count_exp,
14714 GEN_INT (epilogue_size_needed),
14715 LTU, 0, counter_mode (count_exp), 1, label);
14716 if (GET_CODE (count_exp) == CONST_INT)
14718 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14719 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14721 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14723 /* Emit code to decide on runtime whether library call or inline should be
14725 if (dynamic_check != -1)
14727 rtx hot_label = gen_label_rtx ();
14728 jump_around_label = gen_label_rtx ();
14729 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14730 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14731 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14732 emit_block_move_via_libcall (dst, src, count_exp, false);
14733 emit_jump (jump_around_label);
14734 emit_label (hot_label);
14737 /* Step 2: Alignment prologue. */
14739 if (desired_align > align)
14741 /* Except for the first move in epilogue, we no longer know
14742 constant offset in aliasing info. It don't seems to worth
14743 the pain to maintain it for the first move, so throw away
14745 src = change_address (src, BLKmode, srcreg);
14746 dst = change_address (dst, BLKmode, destreg);
14747 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14750 if (label && size_needed == 1)
14752 emit_label (label);
14753 LABEL_NUSES (label) = 1;
14757 /* Step 3: Main loop. */
14763 gcc_unreachable ();
14765 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14766 count_exp, QImode, 1, expected_size);
14769 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14770 count_exp, Pmode, 1, expected_size);
14772 case unrolled_loop:
14773 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14774 registers for 4 temporaries anyway. */
14775 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14776 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14779 case rep_prefix_8_byte:
14780 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14783 case rep_prefix_4_byte:
14784 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14787 case rep_prefix_1_byte:
14788 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14792 /* Adjust properly the offset of src and dest memory for aliasing. */
14793 if (CONST_INT_P (count_exp))
14795 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14796 (count / size_needed) * size_needed);
14797 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14798 (count / size_needed) * size_needed);
14802 src = change_address (src, BLKmode, srcreg);
14803 dst = change_address (dst, BLKmode, destreg);
14806 /* Step 4: Epilogue to copy the remaining bytes. */
14810 /* When the main loop is done, COUNT_EXP might hold original count,
14811 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14812 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14813 bytes. Compensate if needed. */
14815 if (size_needed < epilogue_size_needed)
14818 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14819 GEN_INT (size_needed - 1), count_exp, 1,
14821 if (tmp != count_exp)
14822 emit_move_insn (count_exp, tmp);
14824 emit_label (label);
14825 LABEL_NUSES (label) = 1;
14828 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14829 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14830 epilogue_size_needed);
14831 if (jump_around_label)
14832 emit_label (jump_around_label);
14836 /* Helper function for memcpy. For QImode value 0xXY produce
14837 0xXYXYXYXY of wide specified by MODE. This is essentially
14838 a * 0x10101010, but we can do slightly better than
14839 synth_mult by unwinding the sequence by hand on CPUs with
14842 promote_duplicated_reg (enum machine_mode mode, rtx val)
14844 enum machine_mode valmode = GET_MODE (val);
14846 int nops = mode == DImode ? 3 : 2;
14848 gcc_assert (mode == SImode || mode == DImode);
14849 if (val == const0_rtx)
14850 return copy_to_mode_reg (mode, const0_rtx);
14851 if (CONST_INT_P (val))
14853 HOST_WIDE_INT v = INTVAL (val) & 255;
14857 if (mode == DImode)
14858 v |= (v << 16) << 16;
14859 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14862 if (valmode == VOIDmode)
14864 if (valmode != QImode)
14865 val = gen_lowpart (QImode, val);
14866 if (mode == QImode)
14868 if (!TARGET_PARTIAL_REG_STALL)
14870 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14871 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14872 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14873 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14875 rtx reg = convert_modes (mode, QImode, val, true);
14876 tmp = promote_duplicated_reg (mode, const1_rtx);
14877 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14882 rtx reg = convert_modes (mode, QImode, val, true);
14884 if (!TARGET_PARTIAL_REG_STALL)
14885 if (mode == SImode)
14886 emit_insn (gen_movsi_insv_1 (reg, reg));
14888 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14891 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14892 NULL, 1, OPTAB_DIRECT);
14894 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14896 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14897 NULL, 1, OPTAB_DIRECT);
14898 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14899 if (mode == SImode)
14901 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14902 NULL, 1, OPTAB_DIRECT);
14903 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14908 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14909 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14910 alignment from ALIGN to DESIRED_ALIGN. */
14912 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14917 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14918 promoted_val = promote_duplicated_reg (DImode, val);
14919 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14920 promoted_val = promote_duplicated_reg (SImode, val);
14921 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14922 promoted_val = promote_duplicated_reg (HImode, val);
14924 promoted_val = val;
14926 return promoted_val;
14929 /* Expand string clear operation (bzero). Use i386 string operations when
14930 profitable. See expand_movmem comment for explanation of individual
14931 steps performed. */
14933 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14934 rtx expected_align_exp, rtx expected_size_exp)
14939 rtx jump_around_label = NULL;
14940 HOST_WIDE_INT align = 1;
14941 unsigned HOST_WIDE_INT count = 0;
14942 HOST_WIDE_INT expected_size = -1;
14943 int size_needed = 0, epilogue_size_needed;
14944 int desired_align = 0;
14945 enum stringop_alg alg;
14946 rtx promoted_val = NULL;
14947 bool force_loopy_epilogue = false;
14950 if (CONST_INT_P (align_exp))
14951 align = INTVAL (align_exp);
14952 /* i386 can do misaligned access on reasonably increased cost. */
14953 if (CONST_INT_P (expected_align_exp)
14954 && INTVAL (expected_align_exp) > align)
14955 align = INTVAL (expected_align_exp);
14956 if (CONST_INT_P (count_exp))
14957 count = expected_size = INTVAL (count_exp);
14958 if (CONST_INT_P (expected_size_exp) && count == 0)
14959 expected_size = INTVAL (expected_size_exp);
14961 /* Step 0: Decide on preferred algorithm, desired alignment and
14962 size of chunks to be copied by main loop. */
14964 alg = decide_alg (count, expected_size, true, &dynamic_check);
14965 desired_align = decide_alignment (align, alg, expected_size);
14967 if (!TARGET_ALIGN_STRINGOPS)
14968 align = desired_align;
14970 if (alg == libcall)
14972 gcc_assert (alg != no_stringop);
14974 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14975 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14980 gcc_unreachable ();
14982 size_needed = GET_MODE_SIZE (Pmode);
14984 case unrolled_loop:
14985 size_needed = GET_MODE_SIZE (Pmode) * 4;
14987 case rep_prefix_8_byte:
14990 case rep_prefix_4_byte:
14993 case rep_prefix_1_byte:
14998 epilogue_size_needed = size_needed;
15000 /* Step 1: Prologue guard. */
15002 /* Alignment code needs count to be in register. */
15003 if (CONST_INT_P (count_exp) && desired_align > align)
15005 enum machine_mode mode = SImode;
15006 if (TARGET_64BIT && (count & ~0xffffffff))
15008 count_exp = force_reg (mode, count_exp);
15010 /* Do the cheap promotion to allow better CSE across the
15011 main loop and epilogue (ie one load of the big constant in the
15012 front of all code. */
15013 if (CONST_INT_P (val_exp))
15014 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15015 desired_align, align);
15016 /* Ensure that alignment prologue won't copy past end of block. */
15017 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15019 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15020 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15021 Make sure it is power of 2. */
15022 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15024 /* To improve performance of small blocks, we jump around the VAL
15025 promoting mode. This mean that if the promoted VAL is not constant,
15026 we might not use it in the epilogue and have to use byte
15028 if (epilogue_size_needed > 2 && !promoted_val)
15029 force_loopy_epilogue = true;
15030 label = gen_label_rtx ();
15031 emit_cmp_and_jump_insns (count_exp,
15032 GEN_INT (epilogue_size_needed),
15033 LTU, 0, counter_mode (count_exp), 1, label);
15034 if (GET_CODE (count_exp) == CONST_INT)
15036 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15037 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15039 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15041 if (dynamic_check != -1)
15043 rtx hot_label = gen_label_rtx ();
15044 jump_around_label = gen_label_rtx ();
15045 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15046 LEU, 0, counter_mode (count_exp), 1, hot_label);
15047 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15048 set_storage_via_libcall (dst, count_exp, val_exp, false);
15049 emit_jump (jump_around_label);
15050 emit_label (hot_label);
15053 /* Step 2: Alignment prologue. */
15055 /* Do the expensive promotion once we branched off the small blocks. */
15057 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15058 desired_align, align);
15059 gcc_assert (desired_align >= 1 && align >= 1);
15061 if (desired_align > align)
15063 /* Except for the first move in epilogue, we no longer know
15064 constant offset in aliasing info. It don't seems to worth
15065 the pain to maintain it for the first move, so throw away
15067 dst = change_address (dst, BLKmode, destreg);
15068 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15071 if (label && size_needed == 1)
15073 emit_label (label);
15074 LABEL_NUSES (label) = 1;
15078 /* Step 3: Main loop. */
15084 gcc_unreachable ();
15086 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15087 count_exp, QImode, 1, expected_size);
15090 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15091 count_exp, Pmode, 1, expected_size);
15093 case unrolled_loop:
15094 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15095 count_exp, Pmode, 4, expected_size);
15097 case rep_prefix_8_byte:
15098 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15101 case rep_prefix_4_byte:
15102 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15105 case rep_prefix_1_byte:
15106 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15110 /* Adjust properly the offset of src and dest memory for aliasing. */
15111 if (CONST_INT_P (count_exp))
15112 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15113 (count / size_needed) * size_needed);
15115 dst = change_address (dst, BLKmode, destreg);
15117 /* Step 4: Epilogue to copy the remaining bytes. */
15121 /* When the main loop is done, COUNT_EXP might hold original count,
15122 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15123 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15124 bytes. Compensate if needed. */
15126 if (size_needed < desired_align - align)
15129 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15130 GEN_INT (size_needed - 1), count_exp, 1,
15132 size_needed = desired_align - align + 1;
15133 if (tmp != count_exp)
15134 emit_move_insn (count_exp, tmp);
15136 emit_label (label);
15137 LABEL_NUSES (label) = 1;
15139 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15141 if (force_loopy_epilogue)
15142 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15145 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15148 if (jump_around_label)
15149 emit_label (jump_around_label);
15153 /* Expand the appropriate insns for doing strlen if not just doing
15156 out = result, initialized with the start address
15157 align_rtx = alignment of the address.
15158 scratch = scratch register, initialized with the startaddress when
15159 not aligned, otherwise undefined
15161 This is just the body. It needs the initializations mentioned above and
15162 some address computing at the end. These things are done in i386.md. */
15165 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15169 rtx align_2_label = NULL_RTX;
15170 rtx align_3_label = NULL_RTX;
15171 rtx align_4_label = gen_label_rtx ();
15172 rtx end_0_label = gen_label_rtx ();
15174 rtx tmpreg = gen_reg_rtx (SImode);
15175 rtx scratch = gen_reg_rtx (SImode);
15179 if (CONST_INT_P (align_rtx))
15180 align = INTVAL (align_rtx);
15182 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15184 /* Is there a known alignment and is it less than 4? */
15187 rtx scratch1 = gen_reg_rtx (Pmode);
15188 emit_move_insn (scratch1, out);
15189 /* Is there a known alignment and is it not 2? */
15192 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15193 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15195 /* Leave just the 3 lower bits. */
15196 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15197 NULL_RTX, 0, OPTAB_WIDEN);
15199 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15200 Pmode, 1, align_4_label);
15201 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15202 Pmode, 1, align_2_label);
15203 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15204 Pmode, 1, align_3_label);
15208 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15209 check if is aligned to 4 - byte. */
15211 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15212 NULL_RTX, 0, OPTAB_WIDEN);
15214 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15215 Pmode, 1, align_4_label);
15218 mem = change_address (src, QImode, out);
15220 /* Now compare the bytes. */
15222 /* Compare the first n unaligned byte on a byte per byte basis. */
15223 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15224 QImode, 1, end_0_label);
15226 /* Increment the address. */
15228 emit_insn (gen_adddi3 (out, out, const1_rtx));
15230 emit_insn (gen_addsi3 (out, out, const1_rtx));
15232 /* Not needed with an alignment of 2 */
15235 emit_label (align_2_label);
15237 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15241 emit_insn (gen_adddi3 (out, out, const1_rtx));
15243 emit_insn (gen_addsi3 (out, out, const1_rtx));
15245 emit_label (align_3_label);
15248 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15252 emit_insn (gen_adddi3 (out, out, const1_rtx));
15254 emit_insn (gen_addsi3 (out, out, const1_rtx));
15257 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15258 align this loop. It gives only huge programs, but does not help to
15260 emit_label (align_4_label);
15262 mem = change_address (src, SImode, out);
15263 emit_move_insn (scratch, mem);
15265 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15267 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15269 /* This formula yields a nonzero result iff one of the bytes is zero.
15270 This saves three branches inside loop and many cycles. */
15272 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15273 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15274 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15275 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15276 gen_int_mode (0x80808080, SImode)));
15277 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15282 rtx reg = gen_reg_rtx (SImode);
15283 rtx reg2 = gen_reg_rtx (Pmode);
15284 emit_move_insn (reg, tmpreg);
15285 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15287 /* If zero is not in the first two bytes, move two bytes forward. */
15288 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15289 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15290 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15291 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15292 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15295 /* Emit lea manually to avoid clobbering of flags. */
15296 emit_insn (gen_rtx_SET (SImode, reg2,
15297 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15299 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15300 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15301 emit_insn (gen_rtx_SET (VOIDmode, out,
15302 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15309 rtx end_2_label = gen_label_rtx ();
15310 /* Is zero in the first two bytes? */
15312 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15313 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15314 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15315 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15316 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15318 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15319 JUMP_LABEL (tmp) = end_2_label;
15321 /* Not in the first two. Move two bytes forward. */
15322 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15324 emit_insn (gen_adddi3 (out, out, const2_rtx));
15326 emit_insn (gen_addsi3 (out, out, const2_rtx));
15328 emit_label (end_2_label);
15332 /* Avoid branch in fixing the byte. */
15333 tmpreg = gen_lowpart (QImode, tmpreg);
15334 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15335 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15337 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15339 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15341 emit_label (end_0_label);
15344 /* Expand strlen. */
15347 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15349 rtx addr, scratch1, scratch2, scratch3, scratch4;
15351 /* The generic case of strlen expander is long. Avoid it's
15352 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15354 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15355 && !TARGET_INLINE_ALL_STRINGOPS
15357 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15360 addr = force_reg (Pmode, XEXP (src, 0));
15361 scratch1 = gen_reg_rtx (Pmode);
15363 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15366 /* Well it seems that some optimizer does not combine a call like
15367 foo(strlen(bar), strlen(bar));
15368 when the move and the subtraction is done here. It does calculate
15369 the length just once when these instructions are done inside of
15370 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15371 often used and I use one fewer register for the lifetime of
15372 output_strlen_unroll() this is better. */
15374 emit_move_insn (out, addr);
15376 ix86_expand_strlensi_unroll_1 (out, src, align);
15378 /* strlensi_unroll_1 returns the address of the zero at the end of
15379 the string, like memchr(), so compute the length by subtracting
15380 the start address. */
15382 emit_insn (gen_subdi3 (out, out, addr));
15384 emit_insn (gen_subsi3 (out, out, addr));
15389 scratch2 = gen_reg_rtx (Pmode);
15390 scratch3 = gen_reg_rtx (Pmode);
15391 scratch4 = force_reg (Pmode, constm1_rtx);
15393 emit_move_insn (scratch3, addr);
15394 eoschar = force_reg (QImode, eoschar);
15396 src = replace_equiv_address_nv (src, scratch3);
15398 /* If .md starts supporting :P, this can be done in .md. */
15399 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15400 scratch4), UNSPEC_SCAS);
15401 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15404 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15405 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15409 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15410 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15416 /* For given symbol (function) construct code to compute address of it's PLT
15417 entry in large x86-64 PIC model. */
15419 construct_plt_address (rtx symbol)
15421 rtx tmp = gen_reg_rtx (Pmode);
15422 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15424 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15425 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15427 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15428 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15433 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15434 rtx callarg2 ATTRIBUTE_UNUSED,
15435 rtx pop, int sibcall)
15437 rtx use = NULL, call;
15439 if (pop == const0_rtx)
15441 gcc_assert (!TARGET_64BIT || !pop);
15443 if (TARGET_MACHO && !TARGET_64BIT)
15446 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15447 fnaddr = machopic_indirect_call_target (fnaddr);
15452 /* Static functions and indirect calls don't need the pic register. */
15453 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15454 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15455 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15456 use_reg (&use, pic_offset_table_rtx);
15459 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15461 rtx al = gen_rtx_REG (QImode, 0);
15462 emit_move_insn (al, callarg2);
15463 use_reg (&use, al);
15466 if (ix86_cmodel == CM_LARGE_PIC
15467 && GET_CODE (fnaddr) == MEM
15468 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15469 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15470 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15471 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15473 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15474 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15476 if (sibcall && TARGET_64BIT
15477 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15480 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15481 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15482 emit_move_insn (fnaddr, addr);
15483 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15486 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15488 call = gen_rtx_SET (VOIDmode, retval, call);
15491 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15492 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15493 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15496 call = emit_call_insn (call);
15498 CALL_INSN_FUNCTION_USAGE (call) = use;
15502 /* Clear stack slot assignments remembered from previous functions.
15503 This is called from INIT_EXPANDERS once before RTL is emitted for each
15506 static struct machine_function *
15507 ix86_init_machine_status (void)
15509 struct machine_function *f;
15511 f = ggc_alloc_cleared (sizeof (struct machine_function));
15512 f->use_fast_prologue_epilogue_nregs = -1;
15513 f->tls_descriptor_call_expanded_p = 0;
15518 /* Return a MEM corresponding to a stack slot with mode MODE.
15519 Allocate a new slot if necessary.
15521 The RTL for a function can have several slots available: N is
15522 which slot to use. */
15525 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15527 struct stack_local_entry *s;
15529 gcc_assert (n < MAX_386_STACK_LOCALS);
15531 for (s = ix86_stack_locals; s; s = s->next)
15532 if (s->mode == mode && s->n == n)
15533 return copy_rtx (s->rtl);
15535 s = (struct stack_local_entry *)
15536 ggc_alloc (sizeof (struct stack_local_entry));
15539 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15541 s->next = ix86_stack_locals;
15542 ix86_stack_locals = s;
15546 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15548 static GTY(()) rtx ix86_tls_symbol;
15550 ix86_tls_get_addr (void)
15553 if (!ix86_tls_symbol)
15555 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15556 (TARGET_ANY_GNU_TLS
15558 ? "___tls_get_addr"
15559 : "__tls_get_addr");
15562 return ix86_tls_symbol;
15565 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15567 static GTY(()) rtx ix86_tls_module_base_symbol;
15569 ix86_tls_module_base (void)
15572 if (!ix86_tls_module_base_symbol)
15574 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15575 "_TLS_MODULE_BASE_");
15576 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15577 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15580 return ix86_tls_module_base_symbol;
15583 /* Calculate the length of the memory address in the instruction
15584 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15587 memory_address_length (rtx addr)
15589 struct ix86_address parts;
15590 rtx base, index, disp;
15594 if (GET_CODE (addr) == PRE_DEC
15595 || GET_CODE (addr) == POST_INC
15596 || GET_CODE (addr) == PRE_MODIFY
15597 || GET_CODE (addr) == POST_MODIFY)
15600 ok = ix86_decompose_address (addr, &parts);
15603 if (parts.base && GET_CODE (parts.base) == SUBREG)
15604 parts.base = SUBREG_REG (parts.base);
15605 if (parts.index && GET_CODE (parts.index) == SUBREG)
15606 parts.index = SUBREG_REG (parts.index);
15609 index = parts.index;
15614 - esp as the base always wants an index,
15615 - ebp as the base always wants a displacement. */
15617 /* Register Indirect. */
15618 if (base && !index && !disp)
15620 /* esp (for its index) and ebp (for its displacement) need
15621 the two-byte modrm form. */
15622 if (addr == stack_pointer_rtx
15623 || addr == arg_pointer_rtx
15624 || addr == frame_pointer_rtx
15625 || addr == hard_frame_pointer_rtx)
15629 /* Direct Addressing. */
15630 else if (disp && !base && !index)
15635 /* Find the length of the displacement constant. */
15638 if (base && satisfies_constraint_K (disp))
15643 /* ebp always wants a displacement. */
15644 else if (base == hard_frame_pointer_rtx)
15647 /* An index requires the two-byte modrm form.... */
15649 /* ...like esp, which always wants an index. */
15650 || base == stack_pointer_rtx
15651 || base == arg_pointer_rtx
15652 || base == frame_pointer_rtx)
15659 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15660 is set, expect that insn have 8bit immediate alternative. */
15662 ix86_attr_length_immediate_default (rtx insn, int shortform)
15666 extract_insn_cached (insn);
15667 for (i = recog_data.n_operands - 1; i >= 0; --i)
15668 if (CONSTANT_P (recog_data.operand[i]))
15671 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15675 switch (get_attr_mode (insn))
15686 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15691 fatal_insn ("unknown insn mode", insn);
15697 /* Compute default value for "length_address" attribute. */
15699 ix86_attr_length_address_default (rtx insn)
15703 if (get_attr_type (insn) == TYPE_LEA)
15705 rtx set = PATTERN (insn);
15707 if (GET_CODE (set) == PARALLEL)
15708 set = XVECEXP (set, 0, 0);
15710 gcc_assert (GET_CODE (set) == SET);
15712 return memory_address_length (SET_SRC (set));
15715 extract_insn_cached (insn);
15716 for (i = recog_data.n_operands - 1; i >= 0; --i)
15717 if (MEM_P (recog_data.operand[i]))
15719 return memory_address_length (XEXP (recog_data.operand[i], 0));
15725 /* Return the maximum number of instructions a cpu can issue. */
15728 ix86_issue_rate (void)
15732 case PROCESSOR_PENTIUM:
15736 case PROCESSOR_PENTIUMPRO:
15737 case PROCESSOR_PENTIUM4:
15738 case PROCESSOR_ATHLON:
15740 case PROCESSOR_AMDFAM10:
15741 case PROCESSOR_NOCONA:
15742 case PROCESSOR_GENERIC32:
15743 case PROCESSOR_GENERIC64:
15746 case PROCESSOR_CORE2:
15754 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15755 by DEP_INSN and nothing set by DEP_INSN. */
15758 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15762 /* Simplify the test for uninteresting insns. */
15763 if (insn_type != TYPE_SETCC
15764 && insn_type != TYPE_ICMOV
15765 && insn_type != TYPE_FCMOV
15766 && insn_type != TYPE_IBR)
15769 if ((set = single_set (dep_insn)) != 0)
15771 set = SET_DEST (set);
15774 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15775 && XVECLEN (PATTERN (dep_insn), 0) == 2
15776 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15777 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15779 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15780 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15785 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15788 /* This test is true if the dependent insn reads the flags but
15789 not any other potentially set register. */
15790 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15793 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15799 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15800 address with operands set by DEP_INSN. */
15803 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15807 if (insn_type == TYPE_LEA
15810 addr = PATTERN (insn);
15812 if (GET_CODE (addr) == PARALLEL)
15813 addr = XVECEXP (addr, 0, 0);
15815 gcc_assert (GET_CODE (addr) == SET);
15817 addr = SET_SRC (addr);
15822 extract_insn_cached (insn);
15823 for (i = recog_data.n_operands - 1; i >= 0; --i)
15824 if (MEM_P (recog_data.operand[i]))
15826 addr = XEXP (recog_data.operand[i], 0);
15833 return modified_in_p (addr, dep_insn);
15837 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15839 enum attr_type insn_type, dep_insn_type;
15840 enum attr_memory memory;
15842 int dep_insn_code_number;
15844 /* Anti and output dependencies have zero cost on all CPUs. */
15845 if (REG_NOTE_KIND (link) != 0)
15848 dep_insn_code_number = recog_memoized (dep_insn);
15850 /* If we can't recognize the insns, we can't really do anything. */
15851 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15854 insn_type = get_attr_type (insn);
15855 dep_insn_type = get_attr_type (dep_insn);
15859 case PROCESSOR_PENTIUM:
15860 /* Address Generation Interlock adds a cycle of latency. */
15861 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15864 /* ??? Compares pair with jump/setcc. */
15865 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15868 /* Floating point stores require value to be ready one cycle earlier. */
15869 if (insn_type == TYPE_FMOV
15870 && get_attr_memory (insn) == MEMORY_STORE
15871 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15875 case PROCESSOR_PENTIUMPRO:
15876 memory = get_attr_memory (insn);
15878 /* INT->FP conversion is expensive. */
15879 if (get_attr_fp_int_src (dep_insn))
15882 /* There is one cycle extra latency between an FP op and a store. */
15883 if (insn_type == TYPE_FMOV
15884 && (set = single_set (dep_insn)) != NULL_RTX
15885 && (set2 = single_set (insn)) != NULL_RTX
15886 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15887 && MEM_P (SET_DEST (set2)))
15890 /* Show ability of reorder buffer to hide latency of load by executing
15891 in parallel with previous instruction in case
15892 previous instruction is not needed to compute the address. */
15893 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15894 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15896 /* Claim moves to take one cycle, as core can issue one load
15897 at time and the next load can start cycle later. */
15898 if (dep_insn_type == TYPE_IMOV
15899 || dep_insn_type == TYPE_FMOV)
15907 memory = get_attr_memory (insn);
15909 /* The esp dependency is resolved before the instruction is really
15911 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15912 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15915 /* INT->FP conversion is expensive. */
15916 if (get_attr_fp_int_src (dep_insn))
15919 /* Show ability of reorder buffer to hide latency of load by executing
15920 in parallel with previous instruction in case
15921 previous instruction is not needed to compute the address. */
15922 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15923 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15925 /* Claim moves to take one cycle, as core can issue one load
15926 at time and the next load can start cycle later. */
15927 if (dep_insn_type == TYPE_IMOV
15928 || dep_insn_type == TYPE_FMOV)
15937 case PROCESSOR_ATHLON:
15939 case PROCESSOR_AMDFAM10:
15940 case PROCESSOR_GENERIC32:
15941 case PROCESSOR_GENERIC64:
15942 memory = get_attr_memory (insn);
15944 /* Show ability of reorder buffer to hide latency of load by executing
15945 in parallel with previous instruction in case
15946 previous instruction is not needed to compute the address. */
15947 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15948 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15950 enum attr_unit unit = get_attr_unit (insn);
15953 /* Because of the difference between the length of integer and
15954 floating unit pipeline preparation stages, the memory operands
15955 for floating point are cheaper.
15957 ??? For Athlon it the difference is most probably 2. */
15958 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15961 loadcost = TARGET_ATHLON ? 2 : 0;
15963 if (cost >= loadcost)
15976 /* How many alternative schedules to try. This should be as wide as the
15977 scheduling freedom in the DFA, but no wider. Making this value too
15978 large results extra work for the scheduler. */
15981 ia32_multipass_dfa_lookahead (void)
15983 if (ix86_tune == PROCESSOR_PENTIUM)
15986 if (ix86_tune == PROCESSOR_PENTIUMPRO
15987 || ix86_tune == PROCESSOR_K6)
15995 /* Compute the alignment given to a constant that is being placed in memory.
15996 EXP is the constant and ALIGN is the alignment that the object would
15998 The value of this function is used instead of that alignment to align
16002 ix86_constant_alignment (tree exp, int align)
16004 if (TREE_CODE (exp) == REAL_CST)
16006 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16008 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16011 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16012 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16013 return BITS_PER_WORD;
16018 /* Compute the alignment for a static variable.
16019 TYPE is the data type, and ALIGN is the alignment that
16020 the object would ordinarily have. The value of this function is used
16021 instead of that alignment to align the object. */
16024 ix86_data_alignment (tree type, int align)
16026 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16028 if (AGGREGATE_TYPE_P (type)
16029 && TYPE_SIZE (type)
16030 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16031 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16032 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16033 && align < max_align)
16036 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16037 to 16byte boundary. */
16040 if (AGGREGATE_TYPE_P (type)
16041 && TYPE_SIZE (type)
16042 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16043 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16044 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16048 if (TREE_CODE (type) == ARRAY_TYPE)
16050 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16052 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16055 else if (TREE_CODE (type) == COMPLEX_TYPE)
16058 if (TYPE_MODE (type) == DCmode && align < 64)
16060 if (TYPE_MODE (type) == XCmode && align < 128)
16063 else if ((TREE_CODE (type) == RECORD_TYPE
16064 || TREE_CODE (type) == UNION_TYPE
16065 || TREE_CODE (type) == QUAL_UNION_TYPE)
16066 && TYPE_FIELDS (type))
16068 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16070 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16073 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16074 || TREE_CODE (type) == INTEGER_TYPE)
16076 if (TYPE_MODE (type) == DFmode && align < 64)
16078 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16085 /* Compute the alignment for a local variable.
16086 TYPE is the data type, and ALIGN is the alignment that
16087 the object would ordinarily have. The value of this macro is used
16088 instead of that alignment to align the object. */
16091 ix86_local_alignment (tree type, int align)
16093 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16094 to 16byte boundary. */
16097 if (AGGREGATE_TYPE_P (type)
16098 && TYPE_SIZE (type)
16099 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16100 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16101 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16104 if (TREE_CODE (type) == ARRAY_TYPE)
16106 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16108 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16111 else if (TREE_CODE (type) == COMPLEX_TYPE)
16113 if (TYPE_MODE (type) == DCmode && align < 64)
16115 if (TYPE_MODE (type) == XCmode && align < 128)
16118 else if ((TREE_CODE (type) == RECORD_TYPE
16119 || TREE_CODE (type) == UNION_TYPE
16120 || TREE_CODE (type) == QUAL_UNION_TYPE)
16121 && TYPE_FIELDS (type))
16123 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16125 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16128 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16129 || TREE_CODE (type) == INTEGER_TYPE)
16132 if (TYPE_MODE (type) == DFmode && align < 64)
16134 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16140 /* Emit RTL insns to initialize the variable parts of a trampoline.
16141 FNADDR is an RTX for the address of the function's pure code.
16142 CXT is an RTX for the static chain value for the function. */
16144 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16148 /* Compute offset from the end of the jmp to the target function. */
16149 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16150 plus_constant (tramp, 10),
16151 NULL_RTX, 1, OPTAB_DIRECT);
16152 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16153 gen_int_mode (0xb9, QImode));
16154 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16155 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16156 gen_int_mode (0xe9, QImode));
16157 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16162 /* Try to load address using shorter movl instead of movabs.
16163 We may want to support movq for kernel mode, but kernel does not use
16164 trampolines at the moment. */
16165 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16167 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16168 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16169 gen_int_mode (0xbb41, HImode));
16170 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16171 gen_lowpart (SImode, fnaddr));
16176 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16177 gen_int_mode (0xbb49, HImode));
16178 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16182 /* Load static chain using movabs to r10. */
16183 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16184 gen_int_mode (0xba49, HImode));
16185 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16188 /* Jump to the r11 */
16189 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16190 gen_int_mode (0xff49, HImode));
16191 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16192 gen_int_mode (0xe3, QImode));
16194 gcc_assert (offset <= TRAMPOLINE_SIZE);
16197 #ifdef ENABLE_EXECUTE_STACK
16198 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16199 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16203 /* Codes for all the SSE/MMX builtins. */
16206 IX86_BUILTIN_ADDPS,
16207 IX86_BUILTIN_ADDSS,
16208 IX86_BUILTIN_DIVPS,
16209 IX86_BUILTIN_DIVSS,
16210 IX86_BUILTIN_MULPS,
16211 IX86_BUILTIN_MULSS,
16212 IX86_BUILTIN_SUBPS,
16213 IX86_BUILTIN_SUBSS,
16215 IX86_BUILTIN_CMPEQPS,
16216 IX86_BUILTIN_CMPLTPS,
16217 IX86_BUILTIN_CMPLEPS,
16218 IX86_BUILTIN_CMPGTPS,
16219 IX86_BUILTIN_CMPGEPS,
16220 IX86_BUILTIN_CMPNEQPS,
16221 IX86_BUILTIN_CMPNLTPS,
16222 IX86_BUILTIN_CMPNLEPS,
16223 IX86_BUILTIN_CMPNGTPS,
16224 IX86_BUILTIN_CMPNGEPS,
16225 IX86_BUILTIN_CMPORDPS,
16226 IX86_BUILTIN_CMPUNORDPS,
16227 IX86_BUILTIN_CMPEQSS,
16228 IX86_BUILTIN_CMPLTSS,
16229 IX86_BUILTIN_CMPLESS,
16230 IX86_BUILTIN_CMPNEQSS,
16231 IX86_BUILTIN_CMPNLTSS,
16232 IX86_BUILTIN_CMPNLESS,
16233 IX86_BUILTIN_CMPNGTSS,
16234 IX86_BUILTIN_CMPNGESS,
16235 IX86_BUILTIN_CMPORDSS,
16236 IX86_BUILTIN_CMPUNORDSS,
16238 IX86_BUILTIN_COMIEQSS,
16239 IX86_BUILTIN_COMILTSS,
16240 IX86_BUILTIN_COMILESS,
16241 IX86_BUILTIN_COMIGTSS,
16242 IX86_BUILTIN_COMIGESS,
16243 IX86_BUILTIN_COMINEQSS,
16244 IX86_BUILTIN_UCOMIEQSS,
16245 IX86_BUILTIN_UCOMILTSS,
16246 IX86_BUILTIN_UCOMILESS,
16247 IX86_BUILTIN_UCOMIGTSS,
16248 IX86_BUILTIN_UCOMIGESS,
16249 IX86_BUILTIN_UCOMINEQSS,
16251 IX86_BUILTIN_CVTPI2PS,
16252 IX86_BUILTIN_CVTPS2PI,
16253 IX86_BUILTIN_CVTSI2SS,
16254 IX86_BUILTIN_CVTSI642SS,
16255 IX86_BUILTIN_CVTSS2SI,
16256 IX86_BUILTIN_CVTSS2SI64,
16257 IX86_BUILTIN_CVTTPS2PI,
16258 IX86_BUILTIN_CVTTSS2SI,
16259 IX86_BUILTIN_CVTTSS2SI64,
16261 IX86_BUILTIN_MAXPS,
16262 IX86_BUILTIN_MAXSS,
16263 IX86_BUILTIN_MINPS,
16264 IX86_BUILTIN_MINSS,
16266 IX86_BUILTIN_LOADUPS,
16267 IX86_BUILTIN_STOREUPS,
16268 IX86_BUILTIN_MOVSS,
16270 IX86_BUILTIN_MOVHLPS,
16271 IX86_BUILTIN_MOVLHPS,
16272 IX86_BUILTIN_LOADHPS,
16273 IX86_BUILTIN_LOADLPS,
16274 IX86_BUILTIN_STOREHPS,
16275 IX86_BUILTIN_STORELPS,
16277 IX86_BUILTIN_MASKMOVQ,
16278 IX86_BUILTIN_MOVMSKPS,
16279 IX86_BUILTIN_PMOVMSKB,
16281 IX86_BUILTIN_MOVNTPS,
16282 IX86_BUILTIN_MOVNTQ,
16284 IX86_BUILTIN_LOADDQU,
16285 IX86_BUILTIN_STOREDQU,
16287 IX86_BUILTIN_PACKSSWB,
16288 IX86_BUILTIN_PACKSSDW,
16289 IX86_BUILTIN_PACKUSWB,
16291 IX86_BUILTIN_PADDB,
16292 IX86_BUILTIN_PADDW,
16293 IX86_BUILTIN_PADDD,
16294 IX86_BUILTIN_PADDQ,
16295 IX86_BUILTIN_PADDSB,
16296 IX86_BUILTIN_PADDSW,
16297 IX86_BUILTIN_PADDUSB,
16298 IX86_BUILTIN_PADDUSW,
16299 IX86_BUILTIN_PSUBB,
16300 IX86_BUILTIN_PSUBW,
16301 IX86_BUILTIN_PSUBD,
16302 IX86_BUILTIN_PSUBQ,
16303 IX86_BUILTIN_PSUBSB,
16304 IX86_BUILTIN_PSUBSW,
16305 IX86_BUILTIN_PSUBUSB,
16306 IX86_BUILTIN_PSUBUSW,
16309 IX86_BUILTIN_PANDN,
16313 IX86_BUILTIN_PAVGB,
16314 IX86_BUILTIN_PAVGW,
16316 IX86_BUILTIN_PCMPEQB,
16317 IX86_BUILTIN_PCMPEQW,
16318 IX86_BUILTIN_PCMPEQD,
16319 IX86_BUILTIN_PCMPGTB,
16320 IX86_BUILTIN_PCMPGTW,
16321 IX86_BUILTIN_PCMPGTD,
16323 IX86_BUILTIN_PMADDWD,
16325 IX86_BUILTIN_PMAXSW,
16326 IX86_BUILTIN_PMAXUB,
16327 IX86_BUILTIN_PMINSW,
16328 IX86_BUILTIN_PMINUB,
16330 IX86_BUILTIN_PMULHUW,
16331 IX86_BUILTIN_PMULHW,
16332 IX86_BUILTIN_PMULLW,
16334 IX86_BUILTIN_PSADBW,
16335 IX86_BUILTIN_PSHUFW,
16337 IX86_BUILTIN_PSLLW,
16338 IX86_BUILTIN_PSLLD,
16339 IX86_BUILTIN_PSLLQ,
16340 IX86_BUILTIN_PSRAW,
16341 IX86_BUILTIN_PSRAD,
16342 IX86_BUILTIN_PSRLW,
16343 IX86_BUILTIN_PSRLD,
16344 IX86_BUILTIN_PSRLQ,
16345 IX86_BUILTIN_PSLLWI,
16346 IX86_BUILTIN_PSLLDI,
16347 IX86_BUILTIN_PSLLQI,
16348 IX86_BUILTIN_PSRAWI,
16349 IX86_BUILTIN_PSRADI,
16350 IX86_BUILTIN_PSRLWI,
16351 IX86_BUILTIN_PSRLDI,
16352 IX86_BUILTIN_PSRLQI,
16354 IX86_BUILTIN_PUNPCKHBW,
16355 IX86_BUILTIN_PUNPCKHWD,
16356 IX86_BUILTIN_PUNPCKHDQ,
16357 IX86_BUILTIN_PUNPCKLBW,
16358 IX86_BUILTIN_PUNPCKLWD,
16359 IX86_BUILTIN_PUNPCKLDQ,
16361 IX86_BUILTIN_SHUFPS,
16363 IX86_BUILTIN_RCPPS,
16364 IX86_BUILTIN_RCPSS,
16365 IX86_BUILTIN_RSQRTPS,
16366 IX86_BUILTIN_RSQRTSS,
16367 IX86_BUILTIN_SQRTPS,
16368 IX86_BUILTIN_SQRTSS,
16370 IX86_BUILTIN_UNPCKHPS,
16371 IX86_BUILTIN_UNPCKLPS,
16373 IX86_BUILTIN_ANDPS,
16374 IX86_BUILTIN_ANDNPS,
16376 IX86_BUILTIN_XORPS,
16379 IX86_BUILTIN_LDMXCSR,
16380 IX86_BUILTIN_STMXCSR,
16381 IX86_BUILTIN_SFENCE,
16383 /* 3DNow! Original */
16384 IX86_BUILTIN_FEMMS,
16385 IX86_BUILTIN_PAVGUSB,
16386 IX86_BUILTIN_PF2ID,
16387 IX86_BUILTIN_PFACC,
16388 IX86_BUILTIN_PFADD,
16389 IX86_BUILTIN_PFCMPEQ,
16390 IX86_BUILTIN_PFCMPGE,
16391 IX86_BUILTIN_PFCMPGT,
16392 IX86_BUILTIN_PFMAX,
16393 IX86_BUILTIN_PFMIN,
16394 IX86_BUILTIN_PFMUL,
16395 IX86_BUILTIN_PFRCP,
16396 IX86_BUILTIN_PFRCPIT1,
16397 IX86_BUILTIN_PFRCPIT2,
16398 IX86_BUILTIN_PFRSQIT1,
16399 IX86_BUILTIN_PFRSQRT,
16400 IX86_BUILTIN_PFSUB,
16401 IX86_BUILTIN_PFSUBR,
16402 IX86_BUILTIN_PI2FD,
16403 IX86_BUILTIN_PMULHRW,
16405 /* 3DNow! Athlon Extensions */
16406 IX86_BUILTIN_PF2IW,
16407 IX86_BUILTIN_PFNACC,
16408 IX86_BUILTIN_PFPNACC,
16409 IX86_BUILTIN_PI2FW,
16410 IX86_BUILTIN_PSWAPDSI,
16411 IX86_BUILTIN_PSWAPDSF,
16414 IX86_BUILTIN_ADDPD,
16415 IX86_BUILTIN_ADDSD,
16416 IX86_BUILTIN_DIVPD,
16417 IX86_BUILTIN_DIVSD,
16418 IX86_BUILTIN_MULPD,
16419 IX86_BUILTIN_MULSD,
16420 IX86_BUILTIN_SUBPD,
16421 IX86_BUILTIN_SUBSD,
16423 IX86_BUILTIN_CMPEQPD,
16424 IX86_BUILTIN_CMPLTPD,
16425 IX86_BUILTIN_CMPLEPD,
16426 IX86_BUILTIN_CMPGTPD,
16427 IX86_BUILTIN_CMPGEPD,
16428 IX86_BUILTIN_CMPNEQPD,
16429 IX86_BUILTIN_CMPNLTPD,
16430 IX86_BUILTIN_CMPNLEPD,
16431 IX86_BUILTIN_CMPNGTPD,
16432 IX86_BUILTIN_CMPNGEPD,
16433 IX86_BUILTIN_CMPORDPD,
16434 IX86_BUILTIN_CMPUNORDPD,
16435 IX86_BUILTIN_CMPEQSD,
16436 IX86_BUILTIN_CMPLTSD,
16437 IX86_BUILTIN_CMPLESD,
16438 IX86_BUILTIN_CMPNEQSD,
16439 IX86_BUILTIN_CMPNLTSD,
16440 IX86_BUILTIN_CMPNLESD,
16441 IX86_BUILTIN_CMPORDSD,
16442 IX86_BUILTIN_CMPUNORDSD,
16444 IX86_BUILTIN_COMIEQSD,
16445 IX86_BUILTIN_COMILTSD,
16446 IX86_BUILTIN_COMILESD,
16447 IX86_BUILTIN_COMIGTSD,
16448 IX86_BUILTIN_COMIGESD,
16449 IX86_BUILTIN_COMINEQSD,
16450 IX86_BUILTIN_UCOMIEQSD,
16451 IX86_BUILTIN_UCOMILTSD,
16452 IX86_BUILTIN_UCOMILESD,
16453 IX86_BUILTIN_UCOMIGTSD,
16454 IX86_BUILTIN_UCOMIGESD,
16455 IX86_BUILTIN_UCOMINEQSD,
16457 IX86_BUILTIN_MAXPD,
16458 IX86_BUILTIN_MAXSD,
16459 IX86_BUILTIN_MINPD,
16460 IX86_BUILTIN_MINSD,
16462 IX86_BUILTIN_ANDPD,
16463 IX86_BUILTIN_ANDNPD,
16465 IX86_BUILTIN_XORPD,
16467 IX86_BUILTIN_SQRTPD,
16468 IX86_BUILTIN_SQRTSD,
16470 IX86_BUILTIN_UNPCKHPD,
16471 IX86_BUILTIN_UNPCKLPD,
16473 IX86_BUILTIN_SHUFPD,
16475 IX86_BUILTIN_LOADUPD,
16476 IX86_BUILTIN_STOREUPD,
16477 IX86_BUILTIN_MOVSD,
16479 IX86_BUILTIN_LOADHPD,
16480 IX86_BUILTIN_LOADLPD,
16482 IX86_BUILTIN_CVTDQ2PD,
16483 IX86_BUILTIN_CVTDQ2PS,
16485 IX86_BUILTIN_CVTPD2DQ,
16486 IX86_BUILTIN_CVTPD2PI,
16487 IX86_BUILTIN_CVTPD2PS,
16488 IX86_BUILTIN_CVTTPD2DQ,
16489 IX86_BUILTIN_CVTTPD2PI,
16491 IX86_BUILTIN_CVTPI2PD,
16492 IX86_BUILTIN_CVTSI2SD,
16493 IX86_BUILTIN_CVTSI642SD,
16495 IX86_BUILTIN_CVTSD2SI,
16496 IX86_BUILTIN_CVTSD2SI64,
16497 IX86_BUILTIN_CVTSD2SS,
16498 IX86_BUILTIN_CVTSS2SD,
16499 IX86_BUILTIN_CVTTSD2SI,
16500 IX86_BUILTIN_CVTTSD2SI64,
16502 IX86_BUILTIN_CVTPS2DQ,
16503 IX86_BUILTIN_CVTPS2PD,
16504 IX86_BUILTIN_CVTTPS2DQ,
16506 IX86_BUILTIN_MOVNTI,
16507 IX86_BUILTIN_MOVNTPD,
16508 IX86_BUILTIN_MOVNTDQ,
16511 IX86_BUILTIN_MASKMOVDQU,
16512 IX86_BUILTIN_MOVMSKPD,
16513 IX86_BUILTIN_PMOVMSKB128,
16515 IX86_BUILTIN_PACKSSWB128,
16516 IX86_BUILTIN_PACKSSDW128,
16517 IX86_BUILTIN_PACKUSWB128,
16519 IX86_BUILTIN_PADDB128,
16520 IX86_BUILTIN_PADDW128,
16521 IX86_BUILTIN_PADDD128,
16522 IX86_BUILTIN_PADDQ128,
16523 IX86_BUILTIN_PADDSB128,
16524 IX86_BUILTIN_PADDSW128,
16525 IX86_BUILTIN_PADDUSB128,
16526 IX86_BUILTIN_PADDUSW128,
16527 IX86_BUILTIN_PSUBB128,
16528 IX86_BUILTIN_PSUBW128,
16529 IX86_BUILTIN_PSUBD128,
16530 IX86_BUILTIN_PSUBQ128,
16531 IX86_BUILTIN_PSUBSB128,
16532 IX86_BUILTIN_PSUBSW128,
16533 IX86_BUILTIN_PSUBUSB128,
16534 IX86_BUILTIN_PSUBUSW128,
16536 IX86_BUILTIN_PAND128,
16537 IX86_BUILTIN_PANDN128,
16538 IX86_BUILTIN_POR128,
16539 IX86_BUILTIN_PXOR128,
16541 IX86_BUILTIN_PAVGB128,
16542 IX86_BUILTIN_PAVGW128,
16544 IX86_BUILTIN_PCMPEQB128,
16545 IX86_BUILTIN_PCMPEQW128,
16546 IX86_BUILTIN_PCMPEQD128,
16547 IX86_BUILTIN_PCMPGTB128,
16548 IX86_BUILTIN_PCMPGTW128,
16549 IX86_BUILTIN_PCMPGTD128,
16551 IX86_BUILTIN_PMADDWD128,
16553 IX86_BUILTIN_PMAXSW128,
16554 IX86_BUILTIN_PMAXUB128,
16555 IX86_BUILTIN_PMINSW128,
16556 IX86_BUILTIN_PMINUB128,
16558 IX86_BUILTIN_PMULUDQ,
16559 IX86_BUILTIN_PMULUDQ128,
16560 IX86_BUILTIN_PMULHUW128,
16561 IX86_BUILTIN_PMULHW128,
16562 IX86_BUILTIN_PMULLW128,
16564 IX86_BUILTIN_PSADBW128,
16565 IX86_BUILTIN_PSHUFHW,
16566 IX86_BUILTIN_PSHUFLW,
16567 IX86_BUILTIN_PSHUFD,
16569 IX86_BUILTIN_PSLLDQI128,
16570 IX86_BUILTIN_PSLLWI128,
16571 IX86_BUILTIN_PSLLDI128,
16572 IX86_BUILTIN_PSLLQI128,
16573 IX86_BUILTIN_PSRAWI128,
16574 IX86_BUILTIN_PSRADI128,
16575 IX86_BUILTIN_PSRLDQI128,
16576 IX86_BUILTIN_PSRLWI128,
16577 IX86_BUILTIN_PSRLDI128,
16578 IX86_BUILTIN_PSRLQI128,
16580 IX86_BUILTIN_PSLLDQ128,
16581 IX86_BUILTIN_PSLLW128,
16582 IX86_BUILTIN_PSLLD128,
16583 IX86_BUILTIN_PSLLQ128,
16584 IX86_BUILTIN_PSRAW128,
16585 IX86_BUILTIN_PSRAD128,
16586 IX86_BUILTIN_PSRLW128,
16587 IX86_BUILTIN_PSRLD128,
16588 IX86_BUILTIN_PSRLQ128,
16590 IX86_BUILTIN_PUNPCKHBW128,
16591 IX86_BUILTIN_PUNPCKHWD128,
16592 IX86_BUILTIN_PUNPCKHDQ128,
16593 IX86_BUILTIN_PUNPCKHQDQ128,
16594 IX86_BUILTIN_PUNPCKLBW128,
16595 IX86_BUILTIN_PUNPCKLWD128,
16596 IX86_BUILTIN_PUNPCKLDQ128,
16597 IX86_BUILTIN_PUNPCKLQDQ128,
16599 IX86_BUILTIN_CLFLUSH,
16600 IX86_BUILTIN_MFENCE,
16601 IX86_BUILTIN_LFENCE,
16603 /* Prescott New Instructions. */
16604 IX86_BUILTIN_ADDSUBPS,
16605 IX86_BUILTIN_HADDPS,
16606 IX86_BUILTIN_HSUBPS,
16607 IX86_BUILTIN_MOVSHDUP,
16608 IX86_BUILTIN_MOVSLDUP,
16609 IX86_BUILTIN_ADDSUBPD,
16610 IX86_BUILTIN_HADDPD,
16611 IX86_BUILTIN_HSUBPD,
16612 IX86_BUILTIN_LDDQU,
16614 IX86_BUILTIN_MONITOR,
16615 IX86_BUILTIN_MWAIT,
16618 IX86_BUILTIN_PHADDW,
16619 IX86_BUILTIN_PHADDD,
16620 IX86_BUILTIN_PHADDSW,
16621 IX86_BUILTIN_PHSUBW,
16622 IX86_BUILTIN_PHSUBD,
16623 IX86_BUILTIN_PHSUBSW,
16624 IX86_BUILTIN_PMADDUBSW,
16625 IX86_BUILTIN_PMULHRSW,
16626 IX86_BUILTIN_PSHUFB,
16627 IX86_BUILTIN_PSIGNB,
16628 IX86_BUILTIN_PSIGNW,
16629 IX86_BUILTIN_PSIGND,
16630 IX86_BUILTIN_PALIGNR,
16631 IX86_BUILTIN_PABSB,
16632 IX86_BUILTIN_PABSW,
16633 IX86_BUILTIN_PABSD,
16635 IX86_BUILTIN_PHADDW128,
16636 IX86_BUILTIN_PHADDD128,
16637 IX86_BUILTIN_PHADDSW128,
16638 IX86_BUILTIN_PHSUBW128,
16639 IX86_BUILTIN_PHSUBD128,
16640 IX86_BUILTIN_PHSUBSW128,
16641 IX86_BUILTIN_PMADDUBSW128,
16642 IX86_BUILTIN_PMULHRSW128,
16643 IX86_BUILTIN_PSHUFB128,
16644 IX86_BUILTIN_PSIGNB128,
16645 IX86_BUILTIN_PSIGNW128,
16646 IX86_BUILTIN_PSIGND128,
16647 IX86_BUILTIN_PALIGNR128,
16648 IX86_BUILTIN_PABSB128,
16649 IX86_BUILTIN_PABSW128,
16650 IX86_BUILTIN_PABSD128,
16652 /* AMDFAM10 - SSE4A New Instructions. */
16653 IX86_BUILTIN_MOVNTSD,
16654 IX86_BUILTIN_MOVNTSS,
16655 IX86_BUILTIN_EXTRQI,
16656 IX86_BUILTIN_EXTRQ,
16657 IX86_BUILTIN_INSERTQI,
16658 IX86_BUILTIN_INSERTQ,
16661 IX86_BUILTIN_BLENDPD,
16662 IX86_BUILTIN_BLENDPS,
16663 IX86_BUILTIN_BLENDVPD,
16664 IX86_BUILTIN_BLENDVPS,
16665 IX86_BUILTIN_PBLENDVB128,
16666 IX86_BUILTIN_PBLENDW128,
16671 IX86_BUILTIN_INSERTPS128,
16673 IX86_BUILTIN_MOVNTDQA,
16674 IX86_BUILTIN_MPSADBW128,
16675 IX86_BUILTIN_PACKUSDW128,
16676 IX86_BUILTIN_PCMPEQQ,
16677 IX86_BUILTIN_PHMINPOSUW128,
16679 IX86_BUILTIN_PMAXSB128,
16680 IX86_BUILTIN_PMAXSD128,
16681 IX86_BUILTIN_PMAXUD128,
16682 IX86_BUILTIN_PMAXUW128,
16684 IX86_BUILTIN_PMINSB128,
16685 IX86_BUILTIN_PMINSD128,
16686 IX86_BUILTIN_PMINUD128,
16687 IX86_BUILTIN_PMINUW128,
16689 IX86_BUILTIN_PMOVSXBW128,
16690 IX86_BUILTIN_PMOVSXBD128,
16691 IX86_BUILTIN_PMOVSXBQ128,
16692 IX86_BUILTIN_PMOVSXWD128,
16693 IX86_BUILTIN_PMOVSXWQ128,
16694 IX86_BUILTIN_PMOVSXDQ128,
16696 IX86_BUILTIN_PMOVZXBW128,
16697 IX86_BUILTIN_PMOVZXBD128,
16698 IX86_BUILTIN_PMOVZXBQ128,
16699 IX86_BUILTIN_PMOVZXWD128,
16700 IX86_BUILTIN_PMOVZXWQ128,
16701 IX86_BUILTIN_PMOVZXDQ128,
16703 IX86_BUILTIN_PMULDQ128,
16704 IX86_BUILTIN_PMULLD128,
16706 IX86_BUILTIN_ROUNDPD,
16707 IX86_BUILTIN_ROUNDPS,
16708 IX86_BUILTIN_ROUNDSD,
16709 IX86_BUILTIN_ROUNDSS,
16711 IX86_BUILTIN_PTESTZ,
16712 IX86_BUILTIN_PTESTC,
16713 IX86_BUILTIN_PTESTNZC,
16715 IX86_BUILTIN_VEC_INIT_V2SI,
16716 IX86_BUILTIN_VEC_INIT_V4HI,
16717 IX86_BUILTIN_VEC_INIT_V8QI,
16718 IX86_BUILTIN_VEC_EXT_V2DF,
16719 IX86_BUILTIN_VEC_EXT_V2DI,
16720 IX86_BUILTIN_VEC_EXT_V4SF,
16721 IX86_BUILTIN_VEC_EXT_V4SI,
16722 IX86_BUILTIN_VEC_EXT_V8HI,
16723 IX86_BUILTIN_VEC_EXT_V2SI,
16724 IX86_BUILTIN_VEC_EXT_V4HI,
16725 IX86_BUILTIN_VEC_EXT_V16QI,
16726 IX86_BUILTIN_VEC_SET_V2DI,
16727 IX86_BUILTIN_VEC_SET_V4SF,
16728 IX86_BUILTIN_VEC_SET_V4SI,
16729 IX86_BUILTIN_VEC_SET_V8HI,
16730 IX86_BUILTIN_VEC_SET_V4HI,
16731 IX86_BUILTIN_VEC_SET_V16QI,
16734 IX86_BUILTIN_CRC32QI,
16735 IX86_BUILTIN_CRC32HI,
16736 IX86_BUILTIN_CRC32SI,
16737 IX86_BUILTIN_CRC32DI,
16739 IX86_BUILTIN_PCMPGTQ,
16744 /* Table for the ix86 builtin decls. */
16745 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16747 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16748 * if the target_flags include one of MASK. Stores the function decl
16749 * in the ix86_builtins array.
16750 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16753 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16755 tree decl = NULL_TREE;
16757 if (mask & ix86_isa_flags
16758 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16760 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16762 ix86_builtins[(int) code] = decl;
16768 /* Like def_builtin, but also marks the function decl "const". */
16771 def_builtin_const (int mask, const char *name, tree type,
16772 enum ix86_builtins code)
16774 tree decl = def_builtin (mask, name, type, code);
16776 TREE_READONLY (decl) = 1;
16780 /* Bits for builtin_description.flag. */
16782 /* Set when we don't support the comparison natively, and should
16783 swap_comparison in order to support it. */
16784 #define BUILTIN_DESC_SWAP_OPERANDS 1
16786 struct builtin_description
16788 const unsigned int mask;
16789 const enum insn_code icode;
16790 const char *const name;
16791 const enum ix86_builtins code;
16792 const enum rtx_code comparison;
16793 const unsigned int flag;
16796 static const struct builtin_description bdesc_comi[] =
16798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16824 static const struct builtin_description bdesc_ptest[] =
16827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16832 static const struct builtin_description bdesc_crc32[] =
16835 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, 0, 0 },
16836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, 0, 0 },
16837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, 0, 0 },
16838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 },
16841 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16842 constant or xmm0. */
16843 static const struct builtin_description bdesc_sse_3arg[] =
16846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
16847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
16848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
16849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
16850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
16851 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
16852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
16853 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
16854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
16855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
16856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
16857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
16860 static const struct builtin_description bdesc_2arg[] =
16863 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16864 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16865 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16866 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
16876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
16877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16895 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16896 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16900 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16902 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16903 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16915 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16932 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16939 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16949 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16951 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16968 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16989 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16993 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16994 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16995 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16996 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
17000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
17002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
17024 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
17025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
17026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
17028 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
17029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
17030 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
17031 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
17033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
17034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
17035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
17038 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
17039 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
17040 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
17041 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
17042 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
17043 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
17044 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
17045 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
17048 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
17049 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
17050 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
17053 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
17054 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
17056 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
17057 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
17059 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
17060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
17061 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
17062 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
17064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
17065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
17067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
17068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
17069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
17070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
17071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
17072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
17074 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
17075 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
17076 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
17077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
17079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
17080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
17081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
17082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
17083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
17084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
17085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
17086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
17088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
17089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
17090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
17092 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
17093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
17095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
17096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
17098 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
17099 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
17100 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
17102 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
17103 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
17104 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
17106 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
17107 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
17109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
17111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
17112 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
17113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
17114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
17117 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
17118 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
17119 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
17120 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
17121 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
17122 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
17125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
17126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
17127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
17128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
17129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
17130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
17131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
17132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
17133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
17134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
17135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
17136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
17137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
17138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
17139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
17140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
17141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
17142 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
17143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
17144 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
17145 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
17146 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
17147 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
17148 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
17151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
17152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
17153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
17154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
17155 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
17156 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
17157 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
17158 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
17159 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
17160 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
17161 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
17162 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
17165 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, 0, 0 },
17168 static const struct builtin_description bdesc_1arg[] =
17170 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
17171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
17173 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
17174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
17175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
17177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
17178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
17179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
17180 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
17181 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
17182 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
17184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
17185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
17189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
17194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
17202 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
17203 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
17210 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
17211 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
17214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
17215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
17216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
17217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
17218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
17219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
17222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
17223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
17224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
17225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
17226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
17227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
17228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
17229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
17230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
17231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
17232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
17233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
17234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
17236 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
17238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
17241 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17242 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17245 ix86_init_mmx_sse_builtins (void)
17247 const struct builtin_description * d;
17250 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17251 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17252 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17253 tree V2DI_type_node
17254 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17255 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17256 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17257 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17258 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17259 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17260 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17262 tree pchar_type_node = build_pointer_type (char_type_node);
17263 tree pcchar_type_node = build_pointer_type (
17264 build_type_variant (char_type_node, 1, 0));
17265 tree pfloat_type_node = build_pointer_type (float_type_node);
17266 tree pcfloat_type_node = build_pointer_type (
17267 build_type_variant (float_type_node, 1, 0));
17268 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17269 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17270 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17273 tree int_ftype_v4sf_v4sf
17274 = build_function_type_list (integer_type_node,
17275 V4SF_type_node, V4SF_type_node, NULL_TREE);
17276 tree v4si_ftype_v4sf_v4sf
17277 = build_function_type_list (V4SI_type_node,
17278 V4SF_type_node, V4SF_type_node, NULL_TREE);
17279 /* MMX/SSE/integer conversions. */
17280 tree int_ftype_v4sf
17281 = build_function_type_list (integer_type_node,
17282 V4SF_type_node, NULL_TREE);
17283 tree int64_ftype_v4sf
17284 = build_function_type_list (long_long_integer_type_node,
17285 V4SF_type_node, NULL_TREE);
17286 tree int_ftype_v8qi
17287 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17288 tree v4sf_ftype_v4sf_int
17289 = build_function_type_list (V4SF_type_node,
17290 V4SF_type_node, integer_type_node, NULL_TREE);
17291 tree v4sf_ftype_v4sf_int64
17292 = build_function_type_list (V4SF_type_node,
17293 V4SF_type_node, long_long_integer_type_node,
17295 tree v4sf_ftype_v4sf_v2si
17296 = build_function_type_list (V4SF_type_node,
17297 V4SF_type_node, V2SI_type_node, NULL_TREE);
17299 /* Miscellaneous. */
17300 tree v8qi_ftype_v4hi_v4hi
17301 = build_function_type_list (V8QI_type_node,
17302 V4HI_type_node, V4HI_type_node, NULL_TREE);
17303 tree v4hi_ftype_v2si_v2si
17304 = build_function_type_list (V4HI_type_node,
17305 V2SI_type_node, V2SI_type_node, NULL_TREE);
17306 tree v4sf_ftype_v4sf_v4sf_int
17307 = build_function_type_list (V4SF_type_node,
17308 V4SF_type_node, V4SF_type_node,
17309 integer_type_node, NULL_TREE);
17310 tree v2si_ftype_v4hi_v4hi
17311 = build_function_type_list (V2SI_type_node,
17312 V4HI_type_node, V4HI_type_node, NULL_TREE);
17313 tree v4hi_ftype_v4hi_int
17314 = build_function_type_list (V4HI_type_node,
17315 V4HI_type_node, integer_type_node, NULL_TREE);
17316 tree v4hi_ftype_v4hi_di
17317 = build_function_type_list (V4HI_type_node,
17318 V4HI_type_node, long_long_unsigned_type_node,
17320 tree v2si_ftype_v2si_di
17321 = build_function_type_list (V2SI_type_node,
17322 V2SI_type_node, long_long_unsigned_type_node,
17324 tree void_ftype_void
17325 = build_function_type (void_type_node, void_list_node);
17326 tree void_ftype_unsigned
17327 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17328 tree void_ftype_unsigned_unsigned
17329 = build_function_type_list (void_type_node, unsigned_type_node,
17330 unsigned_type_node, NULL_TREE);
17331 tree void_ftype_pcvoid_unsigned_unsigned
17332 = build_function_type_list (void_type_node, const_ptr_type_node,
17333 unsigned_type_node, unsigned_type_node,
17335 tree unsigned_ftype_void
17336 = build_function_type (unsigned_type_node, void_list_node);
17337 tree v2si_ftype_v4sf
17338 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17339 /* Loads/stores. */
17340 tree void_ftype_v8qi_v8qi_pchar
17341 = build_function_type_list (void_type_node,
17342 V8QI_type_node, V8QI_type_node,
17343 pchar_type_node, NULL_TREE);
17344 tree v4sf_ftype_pcfloat
17345 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17346 /* @@@ the type is bogus */
17347 tree v4sf_ftype_v4sf_pv2si
17348 = build_function_type_list (V4SF_type_node,
17349 V4SF_type_node, pv2si_type_node, NULL_TREE);
17350 tree void_ftype_pv2si_v4sf
17351 = build_function_type_list (void_type_node,
17352 pv2si_type_node, V4SF_type_node, NULL_TREE);
17353 tree void_ftype_pfloat_v4sf
17354 = build_function_type_list (void_type_node,
17355 pfloat_type_node, V4SF_type_node, NULL_TREE);
17356 tree void_ftype_pdi_di
17357 = build_function_type_list (void_type_node,
17358 pdi_type_node, long_long_unsigned_type_node,
17360 tree void_ftype_pv2di_v2di
17361 = build_function_type_list (void_type_node,
17362 pv2di_type_node, V2DI_type_node, NULL_TREE);
17363 /* Normal vector unops. */
17364 tree v4sf_ftype_v4sf
17365 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17366 tree v16qi_ftype_v16qi
17367 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17368 tree v8hi_ftype_v8hi
17369 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17370 tree v4si_ftype_v4si
17371 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17372 tree v8qi_ftype_v8qi
17373 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17374 tree v4hi_ftype_v4hi
17375 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17377 /* Normal vector binops. */
17378 tree v4sf_ftype_v4sf_v4sf
17379 = build_function_type_list (V4SF_type_node,
17380 V4SF_type_node, V4SF_type_node, NULL_TREE);
17381 tree v8qi_ftype_v8qi_v8qi
17382 = build_function_type_list (V8QI_type_node,
17383 V8QI_type_node, V8QI_type_node, NULL_TREE);
17384 tree v4hi_ftype_v4hi_v4hi
17385 = build_function_type_list (V4HI_type_node,
17386 V4HI_type_node, V4HI_type_node, NULL_TREE);
17387 tree v2si_ftype_v2si_v2si
17388 = build_function_type_list (V2SI_type_node,
17389 V2SI_type_node, V2SI_type_node, NULL_TREE);
17390 tree di_ftype_di_di
17391 = build_function_type_list (long_long_unsigned_type_node,
17392 long_long_unsigned_type_node,
17393 long_long_unsigned_type_node, NULL_TREE);
17395 tree di_ftype_di_di_int
17396 = build_function_type_list (long_long_unsigned_type_node,
17397 long_long_unsigned_type_node,
17398 long_long_unsigned_type_node,
17399 integer_type_node, NULL_TREE);
17401 tree v2si_ftype_v2sf
17402 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17403 tree v2sf_ftype_v2si
17404 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17405 tree v2si_ftype_v2si
17406 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17407 tree v2sf_ftype_v2sf
17408 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17409 tree v2sf_ftype_v2sf_v2sf
17410 = build_function_type_list (V2SF_type_node,
17411 V2SF_type_node, V2SF_type_node, NULL_TREE);
17412 tree v2si_ftype_v2sf_v2sf
17413 = build_function_type_list (V2SI_type_node,
17414 V2SF_type_node, V2SF_type_node, NULL_TREE);
17415 tree pint_type_node = build_pointer_type (integer_type_node);
17416 tree pdouble_type_node = build_pointer_type (double_type_node);
17417 tree pcdouble_type_node = build_pointer_type (
17418 build_type_variant (double_type_node, 1, 0));
17419 tree int_ftype_v2df_v2df
17420 = build_function_type_list (integer_type_node,
17421 V2DF_type_node, V2DF_type_node, NULL_TREE);
17423 tree void_ftype_pcvoid
17424 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17425 tree v4sf_ftype_v4si
17426 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17427 tree v4si_ftype_v4sf
17428 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17429 tree v2df_ftype_v4si
17430 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17431 tree v4si_ftype_v2df
17432 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17433 tree v2si_ftype_v2df
17434 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17435 tree v4sf_ftype_v2df
17436 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17437 tree v2df_ftype_v2si
17438 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17439 tree v2df_ftype_v4sf
17440 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17441 tree int_ftype_v2df
17442 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17443 tree int64_ftype_v2df
17444 = build_function_type_list (long_long_integer_type_node,
17445 V2DF_type_node, NULL_TREE);
17446 tree v2df_ftype_v2df_int
17447 = build_function_type_list (V2DF_type_node,
17448 V2DF_type_node, integer_type_node, NULL_TREE);
17449 tree v2df_ftype_v2df_int64
17450 = build_function_type_list (V2DF_type_node,
17451 V2DF_type_node, long_long_integer_type_node,
17453 tree v4sf_ftype_v4sf_v2df
17454 = build_function_type_list (V4SF_type_node,
17455 V4SF_type_node, V2DF_type_node, NULL_TREE);
17456 tree v2df_ftype_v2df_v4sf
17457 = build_function_type_list (V2DF_type_node,
17458 V2DF_type_node, V4SF_type_node, NULL_TREE);
17459 tree v2df_ftype_v2df_v2df_int
17460 = build_function_type_list (V2DF_type_node,
17461 V2DF_type_node, V2DF_type_node,
17464 tree v2df_ftype_v2df_pcdouble
17465 = build_function_type_list (V2DF_type_node,
17466 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17467 tree void_ftype_pdouble_v2df
17468 = build_function_type_list (void_type_node,
17469 pdouble_type_node, V2DF_type_node, NULL_TREE);
17470 tree void_ftype_pint_int
17471 = build_function_type_list (void_type_node,
17472 pint_type_node, integer_type_node, NULL_TREE);
17473 tree void_ftype_v16qi_v16qi_pchar
17474 = build_function_type_list (void_type_node,
17475 V16QI_type_node, V16QI_type_node,
17476 pchar_type_node, NULL_TREE);
17477 tree v2df_ftype_pcdouble
17478 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17479 tree v2df_ftype_v2df_v2df
17480 = build_function_type_list (V2DF_type_node,
17481 V2DF_type_node, V2DF_type_node, NULL_TREE);
17482 tree v16qi_ftype_v16qi_v16qi
17483 = build_function_type_list (V16QI_type_node,
17484 V16QI_type_node, V16QI_type_node, NULL_TREE);
17485 tree v8hi_ftype_v8hi_v8hi
17486 = build_function_type_list (V8HI_type_node,
17487 V8HI_type_node, V8HI_type_node, NULL_TREE);
17488 tree v4si_ftype_v4si_v4si
17489 = build_function_type_list (V4SI_type_node,
17490 V4SI_type_node, V4SI_type_node, NULL_TREE);
17491 tree v2di_ftype_v2di_v2di
17492 = build_function_type_list (V2DI_type_node,
17493 V2DI_type_node, V2DI_type_node, NULL_TREE);
17494 tree v2di_ftype_v2df_v2df
17495 = build_function_type_list (V2DI_type_node,
17496 V2DF_type_node, V2DF_type_node, NULL_TREE);
17497 tree v2df_ftype_v2df
17498 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17499 tree v2di_ftype_v2di_int
17500 = build_function_type_list (V2DI_type_node,
17501 V2DI_type_node, integer_type_node, NULL_TREE);
17502 tree v2di_ftype_v2di_v2di_int
17503 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17504 V2DI_type_node, integer_type_node, NULL_TREE);
17505 tree v4si_ftype_v4si_int
17506 = build_function_type_list (V4SI_type_node,
17507 V4SI_type_node, integer_type_node, NULL_TREE);
17508 tree v8hi_ftype_v8hi_int
17509 = build_function_type_list (V8HI_type_node,
17510 V8HI_type_node, integer_type_node, NULL_TREE);
17511 tree v4si_ftype_v8hi_v8hi
17512 = build_function_type_list (V4SI_type_node,
17513 V8HI_type_node, V8HI_type_node, NULL_TREE);
17514 tree di_ftype_v8qi_v8qi
17515 = build_function_type_list (long_long_unsigned_type_node,
17516 V8QI_type_node, V8QI_type_node, NULL_TREE);
17517 tree di_ftype_v2si_v2si
17518 = build_function_type_list (long_long_unsigned_type_node,
17519 V2SI_type_node, V2SI_type_node, NULL_TREE);
17520 tree v2di_ftype_v16qi_v16qi
17521 = build_function_type_list (V2DI_type_node,
17522 V16QI_type_node, V16QI_type_node, NULL_TREE);
17523 tree v2di_ftype_v4si_v4si
17524 = build_function_type_list (V2DI_type_node,
17525 V4SI_type_node, V4SI_type_node, NULL_TREE);
17526 tree int_ftype_v16qi
17527 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17528 tree v16qi_ftype_pcchar
17529 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17530 tree void_ftype_pchar_v16qi
17531 = build_function_type_list (void_type_node,
17532 pchar_type_node, V16QI_type_node, NULL_TREE);
17534 tree v2di_ftype_v2di_unsigned_unsigned
17535 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17536 unsigned_type_node, unsigned_type_node,
17538 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17539 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17540 unsigned_type_node, unsigned_type_node,
17542 tree v2di_ftype_v2di_v16qi
17543 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17545 tree v2df_ftype_v2df_v2df_v2df
17546 = build_function_type_list (V2DF_type_node,
17547 V2DF_type_node, V2DF_type_node,
17548 V2DF_type_node, NULL_TREE);
17549 tree v4sf_ftype_v4sf_v4sf_v4sf
17550 = build_function_type_list (V4SF_type_node,
17551 V4SF_type_node, V4SF_type_node,
17552 V4SF_type_node, NULL_TREE);
17553 tree v8hi_ftype_v16qi
17554 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17556 tree v4si_ftype_v16qi
17557 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17559 tree v2di_ftype_v16qi
17560 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17562 tree v4si_ftype_v8hi
17563 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17565 tree v2di_ftype_v8hi
17566 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17568 tree v2di_ftype_v4si
17569 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17571 tree v2di_ftype_pv2di
17572 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17574 tree v16qi_ftype_v16qi_v16qi_int
17575 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17576 V16QI_type_node, integer_type_node,
17578 tree v16qi_ftype_v16qi_v16qi_v16qi
17579 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17580 V16QI_type_node, V16QI_type_node,
17582 tree v8hi_ftype_v8hi_v8hi_int
17583 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17584 V8HI_type_node, integer_type_node,
17586 tree v4si_ftype_v4si_v4si_int
17587 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17588 V4SI_type_node, integer_type_node,
17590 tree int_ftype_v2di_v2di
17591 = build_function_type_list (integer_type_node,
17592 V2DI_type_node, V2DI_type_node,
17596 tree float128_type;
17599 /* The __float80 type. */
17600 if (TYPE_MODE (long_double_type_node) == XFmode)
17601 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17605 /* The __float80 type. */
17606 float80_type = make_node (REAL_TYPE);
17607 TYPE_PRECISION (float80_type) = 80;
17608 layout_type (float80_type);
17609 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17614 float128_type = make_node (REAL_TYPE);
17615 TYPE_PRECISION (float128_type) = 128;
17616 layout_type (float128_type);
17617 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17620 /* Add all SSE builtins that are more or less simple operations on
17622 for (i = 0, d = bdesc_sse_3arg;
17623 i < ARRAY_SIZE (bdesc_sse_3arg);
17626 /* Use one of the operands; the target can have a different mode for
17627 mask-generating compares. */
17628 enum machine_mode mode;
17633 mode = insn_data[d->icode].operand[1].mode;
17638 type = v16qi_ftype_v16qi_v16qi_int;
17641 type = v8hi_ftype_v8hi_v8hi_int;
17644 type = v4si_ftype_v4si_v4si_int;
17647 type = v2di_ftype_v2di_v2di_int;
17650 type = v2df_ftype_v2df_v2df_int;
17653 type = v4sf_ftype_v4sf_v4sf_int;
17656 gcc_unreachable ();
17659 /* Override for variable blends. */
17662 case CODE_FOR_sse4_1_blendvpd:
17663 type = v2df_ftype_v2df_v2df_v2df;
17665 case CODE_FOR_sse4_1_blendvps:
17666 type = v4sf_ftype_v4sf_v4sf_v4sf;
17668 case CODE_FOR_sse4_1_pblendvb:
17669 type = v16qi_ftype_v16qi_v16qi_v16qi;
17675 def_builtin (d->mask, d->name, type, d->code);
17678 /* Add all builtins that are more or less simple operations on two
17680 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17682 /* Use one of the operands; the target can have a different mode for
17683 mask-generating compares. */
17684 enum machine_mode mode;
17689 mode = insn_data[d->icode].operand[1].mode;
17694 type = v16qi_ftype_v16qi_v16qi;
17697 type = v8hi_ftype_v8hi_v8hi;
17700 type = v4si_ftype_v4si_v4si;
17703 type = v2di_ftype_v2di_v2di;
17706 type = v2df_ftype_v2df_v2df;
17709 type = v4sf_ftype_v4sf_v4sf;
17712 type = v8qi_ftype_v8qi_v8qi;
17715 type = v4hi_ftype_v4hi_v4hi;
17718 type = v2si_ftype_v2si_v2si;
17721 type = di_ftype_di_di;
17725 gcc_unreachable ();
17728 /* Override for comparisons. */
17729 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17730 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17731 type = v4si_ftype_v4sf_v4sf;
17733 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17734 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17735 type = v2di_ftype_v2df_v2df;
17737 def_builtin (d->mask, d->name, type, d->code);
17740 /* Add all builtins that are more or less simple operations on 1 operand. */
17741 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17743 enum machine_mode mode;
17748 mode = insn_data[d->icode].operand[1].mode;
17753 type = v16qi_ftype_v16qi;
17756 type = v8hi_ftype_v8hi;
17759 type = v4si_ftype_v4si;
17762 type = v2df_ftype_v2df;
17765 type = v4sf_ftype_v4sf;
17768 type = v8qi_ftype_v8qi;
17771 type = v4hi_ftype_v4hi;
17774 type = v2si_ftype_v2si;
17781 def_builtin (d->mask, d->name, type, d->code);
17784 /* Add the remaining MMX insns with somewhat more complicated types. */
17785 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17786 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17787 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17788 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17790 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17791 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17792 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17794 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17795 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17797 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17798 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17800 /* comi/ucomi insns. */
17801 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17802 if (d->mask == OPTION_MASK_ISA_SSE2)
17803 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17805 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17808 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17809 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17811 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17812 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17813 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17815 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17816 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17817 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17818 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17819 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17820 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17821 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17822 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17823 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17824 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17825 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17827 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17829 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17830 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17832 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17833 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17834 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17835 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17837 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17838 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17839 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17840 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17842 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17844 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17846 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17847 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17848 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17849 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17850 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17851 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17853 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17855 /* Original 3DNow! */
17856 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17857 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17858 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17859 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17860 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17861 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17862 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17863 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17864 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17865 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17866 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17867 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17868 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17869 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17870 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17871 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17872 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17873 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17874 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17875 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17877 /* 3DNow! extension as used in the Athlon CPU. */
17878 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17879 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17880 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17881 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17882 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17883 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17886 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17888 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17889 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17891 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17892 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17894 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17895 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17896 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17897 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17898 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17900 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17901 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17902 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17903 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17905 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17906 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17908 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17910 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17911 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17913 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17914 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17915 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17916 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17917 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17919 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17921 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17922 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17923 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17924 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17926 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17927 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17928 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17930 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17931 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17932 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17933 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17935 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17936 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17937 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17939 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17940 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17942 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17943 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17945 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17946 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17947 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17948 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17949 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17950 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17951 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17953 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17954 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17955 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17956 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17957 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17958 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17959 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17961 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17962 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17963 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17964 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17966 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17968 /* Prescott New Instructions. */
17969 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
17970 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
17971 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17974 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17975 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
17978 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
17979 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
17980 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
17981 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
17982 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
17983 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
17984 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
17985 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
17986 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
17987 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
17988 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
17989 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
17990 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
17991 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
17992 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
17993 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
17994 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
17995 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
17998 ftype = build_function_type_list (unsigned_type_node,
17999 unsigned_type_node,
18000 unsigned_char_type_node,
18002 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18003 ftype = build_function_type_list (unsigned_type_node,
18004 unsigned_type_node,
18005 short_unsigned_type_node,
18007 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18008 ftype = build_function_type_list (unsigned_type_node,
18009 unsigned_type_node,
18010 unsigned_type_node,
18012 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18013 ftype = build_function_type_list (long_long_unsigned_type_node,
18014 long_long_unsigned_type_node,
18015 long_long_unsigned_type_node,
18017 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18019 /* AMDFAM10 SSE4A New built-ins */
18020 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18021 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18022 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18023 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18024 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18025 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18027 /* Access to the vec_init patterns. */
18028 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18029 integer_type_node, NULL_TREE);
18030 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18032 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18033 short_integer_type_node,
18034 short_integer_type_node,
18035 short_integer_type_node, NULL_TREE);
18036 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18038 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18039 char_type_node, char_type_node,
18040 char_type_node, char_type_node,
18041 char_type_node, char_type_node,
18042 char_type_node, NULL_TREE);
18043 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18045 /* Access to the vec_extract patterns. */
18046 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18047 integer_type_node, NULL_TREE);
18048 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18050 ftype = build_function_type_list (long_long_integer_type_node,
18051 V2DI_type_node, integer_type_node,
18053 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18055 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18056 integer_type_node, NULL_TREE);
18057 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18059 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18060 integer_type_node, NULL_TREE);
18061 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18063 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18064 integer_type_node, NULL_TREE);
18065 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18067 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18068 integer_type_node, NULL_TREE);
18069 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18071 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18072 integer_type_node, NULL_TREE);
18073 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18075 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18076 integer_type_node, NULL_TREE);
18077 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18079 /* Access to the vec_set patterns. */
18080 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18082 integer_type_node, NULL_TREE);
18083 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18085 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18087 integer_type_node, NULL_TREE);
18088 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18090 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18092 integer_type_node, NULL_TREE);
18093 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18095 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18097 integer_type_node, NULL_TREE);
18098 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18100 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18102 integer_type_node, NULL_TREE);
18103 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18105 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18107 integer_type_node, NULL_TREE);
18108 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18112 ix86_init_builtins (void)
18115 ix86_init_mmx_sse_builtins ();
18118 /* Errors in the source file can cause expand_expr to return const0_rtx
18119 where we expect a vector. To avoid crashing, use one of the vector
18120 clear instructions. */
18122 safe_vector_operand (rtx x, enum machine_mode mode)
18124 if (x == const0_rtx)
18125 x = CONST0_RTX (mode);
18129 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18130 4 operands. The third argument must be a constant smaller than 8
18134 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18138 tree arg0 = CALL_EXPR_ARG (exp, 0);
18139 tree arg1 = CALL_EXPR_ARG (exp, 1);
18140 tree arg2 = CALL_EXPR_ARG (exp, 2);
18141 rtx op0 = expand_normal (arg0);
18142 rtx op1 = expand_normal (arg1);
18143 rtx op2 = expand_normal (arg2);
18144 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18145 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18146 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18147 enum machine_mode mode2;
18150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18151 op0 = copy_to_mode_reg (mode0, op0);
18152 if ((optimize && !register_operand (op1, mode1))
18153 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18154 op1 = copy_to_mode_reg (mode1, op1);
18158 case CODE_FOR_sse4_1_blendvpd:
18159 case CODE_FOR_sse4_1_blendvps:
18160 case CODE_FOR_sse4_1_pblendvb:
18161 /* The third argument of variable blends must be xmm0. */
18162 xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
18163 emit_move_insn (xmm0, op2);
18167 mode2 = insn_data[icode].operand[2].mode;
18168 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18172 case CODE_FOR_sse4_1_roundsd:
18173 case CODE_FOR_sse4_1_roundss:
18174 error ("the third argument must be a 4-bit immediate");
18177 error ("the third argument must be a 8-bit immediate");
18187 || GET_MODE (target) != tmode
18188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18189 target = gen_reg_rtx (tmode);
18190 pat = GEN_FCN (icode) (target, op0, op1, op2);
18197 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18200 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18203 tree arg0 = CALL_EXPR_ARG (exp, 0);
18204 tree arg1 = CALL_EXPR_ARG (exp, 1);
18205 rtx op0 = expand_normal (arg0);
18206 rtx op1 = expand_normal (arg1);
18207 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18208 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18209 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18213 || GET_MODE (target) != tmode
18214 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18215 target = gen_reg_rtx (tmode);
18217 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18218 op0 = copy_to_mode_reg (mode0, op0);
18219 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18221 op1 = copy_to_reg (op1);
18222 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18225 pat = GEN_FCN (icode) (target, op0, op1);
18232 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18235 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18238 tree arg0 = CALL_EXPR_ARG (exp, 0);
18239 tree arg1 = CALL_EXPR_ARG (exp, 1);
18240 rtx op0 = expand_normal (arg0);
18241 rtx op1 = expand_normal (arg1);
18242 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18243 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18244 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18246 if (VECTOR_MODE_P (mode0))
18247 op0 = safe_vector_operand (op0, mode0);
18248 if (VECTOR_MODE_P (mode1))
18249 op1 = safe_vector_operand (op1, mode1);
18251 if (optimize || !target
18252 || GET_MODE (target) != tmode
18253 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18254 target = gen_reg_rtx (tmode);
18256 if (GET_MODE (op1) == SImode && mode1 == TImode)
18258 rtx x = gen_reg_rtx (V4SImode);
18259 emit_insn (gen_sse2_loadd (x, op1));
18260 op1 = gen_lowpart (TImode, x);
18263 /* The insn must want input operands in the same modes as the
18265 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18266 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18268 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18269 op0 = copy_to_mode_reg (mode0, op0);
18270 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18271 op1 = copy_to_mode_reg (mode1, op1);
18273 /* ??? Using ix86_fixup_binary_operands is problematic when
18274 we've got mismatched modes. Fake it. */
18280 if (tmode == mode0 && tmode == mode1)
18282 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18286 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18288 op0 = force_reg (mode0, op0);
18289 op1 = force_reg (mode1, op1);
18290 target = gen_reg_rtx (tmode);
18293 pat = GEN_FCN (icode) (target, op0, op1);
18300 /* Subroutine of ix86_expand_builtin to take care of stores. */
18303 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18306 tree arg0 = CALL_EXPR_ARG (exp, 0);
18307 tree arg1 = CALL_EXPR_ARG (exp, 1);
18308 rtx op0 = expand_normal (arg0);
18309 rtx op1 = expand_normal (arg1);
18310 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18311 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18313 if (VECTOR_MODE_P (mode1))
18314 op1 = safe_vector_operand (op1, mode1);
18316 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18317 op1 = copy_to_mode_reg (mode1, op1);
18319 pat = GEN_FCN (icode) (op0, op1);
18325 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18328 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18329 rtx target, int do_load)
18332 tree arg0 = CALL_EXPR_ARG (exp, 0);
18333 rtx op0 = expand_normal (arg0);
18334 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18335 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18337 if (optimize || !target
18338 || GET_MODE (target) != tmode
18339 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18340 target = gen_reg_rtx (tmode);
18342 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18345 if (VECTOR_MODE_P (mode0))
18346 op0 = safe_vector_operand (op0, mode0);
18348 if ((optimize && !register_operand (op0, mode0))
18349 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18350 op0 = copy_to_mode_reg (mode0, op0);
18355 case CODE_FOR_sse4_1_roundpd:
18356 case CODE_FOR_sse4_1_roundps:
18358 tree arg1 = CALL_EXPR_ARG (exp, 1);
18359 rtx op1 = expand_normal (arg1);
18360 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18362 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18364 error ("the second argument must be a 4-bit immediate");
18367 pat = GEN_FCN (icode) (target, op0, op1);
18371 pat = GEN_FCN (icode) (target, op0);
18381 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18382 sqrtss, rsqrtss, rcpss. */
18385 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18388 tree arg0 = CALL_EXPR_ARG (exp, 0);
18389 rtx op1, op0 = expand_normal (arg0);
18390 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18391 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18393 if (optimize || !target
18394 || GET_MODE (target) != tmode
18395 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18396 target = gen_reg_rtx (tmode);
18398 if (VECTOR_MODE_P (mode0))
18399 op0 = safe_vector_operand (op0, mode0);
18401 if ((optimize && !register_operand (op0, mode0))
18402 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18403 op0 = copy_to_mode_reg (mode0, op0);
18406 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18407 op1 = copy_to_mode_reg (mode0, op1);
18409 pat = GEN_FCN (icode) (target, op0, op1);
18416 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18419 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18423 tree arg0 = CALL_EXPR_ARG (exp, 0);
18424 tree arg1 = CALL_EXPR_ARG (exp, 1);
18425 rtx op0 = expand_normal (arg0);
18426 rtx op1 = expand_normal (arg1);
18428 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18429 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18430 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18431 enum rtx_code comparison = d->comparison;
18433 if (VECTOR_MODE_P (mode0))
18434 op0 = safe_vector_operand (op0, mode0);
18435 if (VECTOR_MODE_P (mode1))
18436 op1 = safe_vector_operand (op1, mode1);
18438 /* Swap operands if we have a comparison that isn't available in
18440 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18442 rtx tmp = gen_reg_rtx (mode1);
18443 emit_move_insn (tmp, op1);
18448 if (optimize || !target
18449 || GET_MODE (target) != tmode
18450 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18451 target = gen_reg_rtx (tmode);
18453 if ((optimize && !register_operand (op0, mode0))
18454 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18455 op0 = copy_to_mode_reg (mode0, op0);
18456 if ((optimize && !register_operand (op1, mode1))
18457 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18458 op1 = copy_to_mode_reg (mode1, op1);
18460 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18461 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18468 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18471 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18475 tree arg0 = CALL_EXPR_ARG (exp, 0);
18476 tree arg1 = CALL_EXPR_ARG (exp, 1);
18477 rtx op0 = expand_normal (arg0);
18478 rtx op1 = expand_normal (arg1);
18479 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18480 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18481 enum rtx_code comparison = d->comparison;
18483 if (VECTOR_MODE_P (mode0))
18484 op0 = safe_vector_operand (op0, mode0);
18485 if (VECTOR_MODE_P (mode1))
18486 op1 = safe_vector_operand (op1, mode1);
18488 /* Swap operands if we have a comparison that isn't available in
18490 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18497 target = gen_reg_rtx (SImode);
18498 emit_move_insn (target, const0_rtx);
18499 target = gen_rtx_SUBREG (QImode, target, 0);
18501 if ((optimize && !register_operand (op0, mode0))
18502 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18503 op0 = copy_to_mode_reg (mode0, op0);
18504 if ((optimize && !register_operand (op1, mode1))
18505 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18506 op1 = copy_to_mode_reg (mode1, op1);
18508 pat = GEN_FCN (d->icode) (op0, op1);
18512 emit_insn (gen_rtx_SET (VOIDmode,
18513 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18514 gen_rtx_fmt_ee (comparison, QImode,
18518 return SUBREG_REG (target);
18521 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18524 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18528 tree arg0 = CALL_EXPR_ARG (exp, 0);
18529 tree arg1 = CALL_EXPR_ARG (exp, 1);
18530 rtx op0 = expand_normal (arg0);
18531 rtx op1 = expand_normal (arg1);
18532 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18533 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18534 enum rtx_code comparison = d->comparison;
18536 if (VECTOR_MODE_P (mode0))
18537 op0 = safe_vector_operand (op0, mode0);
18538 if (VECTOR_MODE_P (mode1))
18539 op1 = safe_vector_operand (op1, mode1);
18541 target = gen_reg_rtx (SImode);
18542 emit_move_insn (target, const0_rtx);
18543 target = gen_rtx_SUBREG (QImode, target, 0);
18545 if ((optimize && !register_operand (op0, mode0))
18546 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18547 op0 = copy_to_mode_reg (mode0, op0);
18548 if ((optimize && !register_operand (op1, mode1))
18549 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18550 op1 = copy_to_mode_reg (mode1, op1);
18552 pat = GEN_FCN (d->icode) (op0, op1);
18556 emit_insn (gen_rtx_SET (VOIDmode,
18557 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18558 gen_rtx_fmt_ee (comparison, QImode,
18562 return SUBREG_REG (target);
18565 /* Return the integer constant in ARG. Constrain it to be in the range
18566 of the subparts of VEC_TYPE; issue an error if not. */
18569 get_element_number (tree vec_type, tree arg)
18571 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18573 if (!host_integerp (arg, 1)
18574 || (elt = tree_low_cst (arg, 1), elt > max))
18576 error ("selector must be an integer constant in the range 0..%wi", max);
18583 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18584 ix86_expand_vector_init. We DO have language-level syntax for this, in
18585 the form of (type){ init-list }. Except that since we can't place emms
18586 instructions from inside the compiler, we can't allow the use of MMX
18587 registers unless the user explicitly asks for it. So we do *not* define
18588 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18589 we have builtins invoked by mmintrin.h that gives us license to emit
18590 these sorts of instructions. */
18593 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18595 enum machine_mode tmode = TYPE_MODE (type);
18596 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18597 int i, n_elt = GET_MODE_NUNITS (tmode);
18598 rtvec v = rtvec_alloc (n_elt);
18600 gcc_assert (VECTOR_MODE_P (tmode));
18601 gcc_assert (call_expr_nargs (exp) == n_elt);
18603 for (i = 0; i < n_elt; ++i)
18605 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18606 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18609 if (!target || !register_operand (target, tmode))
18610 target = gen_reg_rtx (tmode);
18612 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18616 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18617 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18618 had a language-level syntax for referencing vector elements. */
18621 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18623 enum machine_mode tmode, mode0;
18628 arg0 = CALL_EXPR_ARG (exp, 0);
18629 arg1 = CALL_EXPR_ARG (exp, 1);
18631 op0 = expand_normal (arg0);
18632 elt = get_element_number (TREE_TYPE (arg0), arg1);
18634 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18635 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18636 gcc_assert (VECTOR_MODE_P (mode0));
18638 op0 = force_reg (mode0, op0);
18640 if (optimize || !target || !register_operand (target, tmode))
18641 target = gen_reg_rtx (tmode);
18643 ix86_expand_vector_extract (true, target, op0, elt);
18648 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18649 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18650 a language-level syntax for referencing vector elements. */
18653 ix86_expand_vec_set_builtin (tree exp)
18655 enum machine_mode tmode, mode1;
18656 tree arg0, arg1, arg2;
18658 rtx op0, op1, target;
18660 arg0 = CALL_EXPR_ARG (exp, 0);
18661 arg1 = CALL_EXPR_ARG (exp, 1);
18662 arg2 = CALL_EXPR_ARG (exp, 2);
18664 tmode = TYPE_MODE (TREE_TYPE (arg0));
18665 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18666 gcc_assert (VECTOR_MODE_P (tmode));
18668 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
18669 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
18670 elt = get_element_number (TREE_TYPE (arg0), arg2);
18672 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
18673 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
18675 op0 = force_reg (tmode, op0);
18676 op1 = force_reg (mode1, op1);
18678 /* OP0 is the source of these builtin functions and shouldn't be
18679 modified. Create a copy, use it and return it as target. */
18680 target = gen_reg_rtx (tmode);
18681 emit_move_insn (target, op0);
18682 ix86_expand_vector_set (true, target, op1, elt);
18687 /* Expand an expression EXP that calls a built-in function,
18688 with result going to TARGET if that's convenient
18689 (and in mode MODE if that's convenient).
18690 SUBTARGET may be used as the target for computing one of EXP's operands.
18691 IGNORE is nonzero if the value is to be ignored. */
18694 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18695 enum machine_mode mode ATTRIBUTE_UNUSED,
18696 int ignore ATTRIBUTE_UNUSED)
18698 const struct builtin_description *d;
18700 enum insn_code icode;
18701 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18702 tree arg0, arg1, arg2, arg3;
18703 rtx op0, op1, op2, op3, pat;
18704 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18705 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18709 case IX86_BUILTIN_EMMS:
18710 emit_insn (gen_mmx_emms ());
18713 case IX86_BUILTIN_SFENCE:
18714 emit_insn (gen_sse_sfence ());
18717 case IX86_BUILTIN_MASKMOVQ:
18718 case IX86_BUILTIN_MASKMOVDQU:
18719 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18720 ? CODE_FOR_mmx_maskmovq
18721 : CODE_FOR_sse2_maskmovdqu);
18722 /* Note the arg order is different from the operand order. */
18723 arg1 = CALL_EXPR_ARG (exp, 0);
18724 arg2 = CALL_EXPR_ARG (exp, 1);
18725 arg0 = CALL_EXPR_ARG (exp, 2);
18726 op0 = expand_normal (arg0);
18727 op1 = expand_normal (arg1);
18728 op2 = expand_normal (arg2);
18729 mode0 = insn_data[icode].operand[0].mode;
18730 mode1 = insn_data[icode].operand[1].mode;
18731 mode2 = insn_data[icode].operand[2].mode;
18733 op0 = force_reg (Pmode, op0);
18734 op0 = gen_rtx_MEM (mode1, op0);
18736 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18737 op0 = copy_to_mode_reg (mode0, op0);
18738 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18739 op1 = copy_to_mode_reg (mode1, op1);
18740 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18741 op2 = copy_to_mode_reg (mode2, op2);
18742 pat = GEN_FCN (icode) (op0, op1, op2);
18748 case IX86_BUILTIN_SQRTSS:
18749 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18750 case IX86_BUILTIN_RSQRTSS:
18751 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18752 case IX86_BUILTIN_RCPSS:
18753 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18755 case IX86_BUILTIN_LOADUPS:
18756 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18758 case IX86_BUILTIN_STOREUPS:
18759 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18761 case IX86_BUILTIN_LOADHPS:
18762 case IX86_BUILTIN_LOADLPS:
18763 case IX86_BUILTIN_LOADHPD:
18764 case IX86_BUILTIN_LOADLPD:
18765 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18766 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18767 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18768 : CODE_FOR_sse2_loadlpd);
18769 arg0 = CALL_EXPR_ARG (exp, 0);
18770 arg1 = CALL_EXPR_ARG (exp, 1);
18771 op0 = expand_normal (arg0);
18772 op1 = expand_normal (arg1);
18773 tmode = insn_data[icode].operand[0].mode;
18774 mode0 = insn_data[icode].operand[1].mode;
18775 mode1 = insn_data[icode].operand[2].mode;
18777 op0 = force_reg (mode0, op0);
18778 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18779 if (optimize || target == 0
18780 || GET_MODE (target) != tmode
18781 || !register_operand (target, tmode))
18782 target = gen_reg_rtx (tmode);
18783 pat = GEN_FCN (icode) (target, op0, op1);
18789 case IX86_BUILTIN_STOREHPS:
18790 case IX86_BUILTIN_STORELPS:
18791 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18792 : CODE_FOR_sse_storelps);
18793 arg0 = CALL_EXPR_ARG (exp, 0);
18794 arg1 = CALL_EXPR_ARG (exp, 1);
18795 op0 = expand_normal (arg0);
18796 op1 = expand_normal (arg1);
18797 mode0 = insn_data[icode].operand[0].mode;
18798 mode1 = insn_data[icode].operand[1].mode;
18800 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18801 op1 = force_reg (mode1, op1);
18803 pat = GEN_FCN (icode) (op0, op1);
18809 case IX86_BUILTIN_MOVNTPS:
18810 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18811 case IX86_BUILTIN_MOVNTQ:
18812 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18814 case IX86_BUILTIN_LDMXCSR:
18815 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18816 target = assign_386_stack_local (SImode, SLOT_TEMP);
18817 emit_move_insn (target, op0);
18818 emit_insn (gen_sse_ldmxcsr (target));
18821 case IX86_BUILTIN_STMXCSR:
18822 target = assign_386_stack_local (SImode, SLOT_TEMP);
18823 emit_insn (gen_sse_stmxcsr (target));
18824 return copy_to_mode_reg (SImode, target);
18826 case IX86_BUILTIN_SHUFPS:
18827 case IX86_BUILTIN_SHUFPD:
18828 icode = (fcode == IX86_BUILTIN_SHUFPS
18829 ? CODE_FOR_sse_shufps
18830 : CODE_FOR_sse2_shufpd);
18831 arg0 = CALL_EXPR_ARG (exp, 0);
18832 arg1 = CALL_EXPR_ARG (exp, 1);
18833 arg2 = CALL_EXPR_ARG (exp, 2);
18834 op0 = expand_normal (arg0);
18835 op1 = expand_normal (arg1);
18836 op2 = expand_normal (arg2);
18837 tmode = insn_data[icode].operand[0].mode;
18838 mode0 = insn_data[icode].operand[1].mode;
18839 mode1 = insn_data[icode].operand[2].mode;
18840 mode2 = insn_data[icode].operand[3].mode;
18842 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18843 op0 = copy_to_mode_reg (mode0, op0);
18844 if ((optimize && !register_operand (op1, mode1))
18845 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18846 op1 = copy_to_mode_reg (mode1, op1);
18847 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18849 /* @@@ better error message */
18850 error ("mask must be an immediate");
18851 return gen_reg_rtx (tmode);
18853 if (optimize || target == 0
18854 || GET_MODE (target) != tmode
18855 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18856 target = gen_reg_rtx (tmode);
18857 pat = GEN_FCN (icode) (target, op0, op1, op2);
18863 case IX86_BUILTIN_PSHUFW:
18864 case IX86_BUILTIN_PSHUFD:
18865 case IX86_BUILTIN_PSHUFHW:
18866 case IX86_BUILTIN_PSHUFLW:
18867 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18868 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18869 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18870 : CODE_FOR_mmx_pshufw);
18871 arg0 = CALL_EXPR_ARG (exp, 0);
18872 arg1 = CALL_EXPR_ARG (exp, 1);
18873 op0 = expand_normal (arg0);
18874 op1 = expand_normal (arg1);
18875 tmode = insn_data[icode].operand[0].mode;
18876 mode1 = insn_data[icode].operand[1].mode;
18877 mode2 = insn_data[icode].operand[2].mode;
18879 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18880 op0 = copy_to_mode_reg (mode1, op0);
18881 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18883 /* @@@ better error message */
18884 error ("mask must be an immediate");
18888 || GET_MODE (target) != tmode
18889 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18890 target = gen_reg_rtx (tmode);
18891 pat = GEN_FCN (icode) (target, op0, op1);
18897 case IX86_BUILTIN_PSLLWI128:
18898 icode = CODE_FOR_ashlv8hi3;
18900 case IX86_BUILTIN_PSLLDI128:
18901 icode = CODE_FOR_ashlv4si3;
18903 case IX86_BUILTIN_PSLLQI128:
18904 icode = CODE_FOR_ashlv2di3;
18906 case IX86_BUILTIN_PSRAWI128:
18907 icode = CODE_FOR_ashrv8hi3;
18909 case IX86_BUILTIN_PSRADI128:
18910 icode = CODE_FOR_ashrv4si3;
18912 case IX86_BUILTIN_PSRLWI128:
18913 icode = CODE_FOR_lshrv8hi3;
18915 case IX86_BUILTIN_PSRLDI128:
18916 icode = CODE_FOR_lshrv4si3;
18918 case IX86_BUILTIN_PSRLQI128:
18919 icode = CODE_FOR_lshrv2di3;
18922 arg0 = CALL_EXPR_ARG (exp, 0);
18923 arg1 = CALL_EXPR_ARG (exp, 1);
18924 op0 = expand_normal (arg0);
18925 op1 = expand_normal (arg1);
18927 if (!CONST_INT_P (op1))
18929 error ("shift must be an immediate");
18932 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18933 op1 = GEN_INT (255);
18935 tmode = insn_data[icode].operand[0].mode;
18936 mode1 = insn_data[icode].operand[1].mode;
18937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18938 op0 = copy_to_reg (op0);
18940 target = gen_reg_rtx (tmode);
18941 pat = GEN_FCN (icode) (target, op0, op1);
18947 case IX86_BUILTIN_PSLLW128:
18948 icode = CODE_FOR_ashlv8hi3;
18950 case IX86_BUILTIN_PSLLD128:
18951 icode = CODE_FOR_ashlv4si3;
18953 case IX86_BUILTIN_PSLLQ128:
18954 icode = CODE_FOR_ashlv2di3;
18956 case IX86_BUILTIN_PSRAW128:
18957 icode = CODE_FOR_ashrv8hi3;
18959 case IX86_BUILTIN_PSRAD128:
18960 icode = CODE_FOR_ashrv4si3;
18962 case IX86_BUILTIN_PSRLW128:
18963 icode = CODE_FOR_lshrv8hi3;
18965 case IX86_BUILTIN_PSRLD128:
18966 icode = CODE_FOR_lshrv4si3;
18968 case IX86_BUILTIN_PSRLQ128:
18969 icode = CODE_FOR_lshrv2di3;
18972 arg0 = CALL_EXPR_ARG (exp, 0);
18973 arg1 = CALL_EXPR_ARG (exp, 1);
18974 op0 = expand_normal (arg0);
18975 op1 = expand_normal (arg1);
18977 tmode = insn_data[icode].operand[0].mode;
18978 mode1 = insn_data[icode].operand[1].mode;
18980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18981 op0 = copy_to_reg (op0);
18983 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18984 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18985 op1 = copy_to_reg (op1);
18987 target = gen_reg_rtx (tmode);
18988 pat = GEN_FCN (icode) (target, op0, op1);
18994 case IX86_BUILTIN_PSLLDQI128:
18995 case IX86_BUILTIN_PSRLDQI128:
18996 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18997 : CODE_FOR_sse2_lshrti3);
18998 arg0 = CALL_EXPR_ARG (exp, 0);
18999 arg1 = CALL_EXPR_ARG (exp, 1);
19000 op0 = expand_normal (arg0);
19001 op1 = expand_normal (arg1);
19002 tmode = insn_data[icode].operand[0].mode;
19003 mode1 = insn_data[icode].operand[1].mode;
19004 mode2 = insn_data[icode].operand[2].mode;
19006 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19008 op0 = copy_to_reg (op0);
19009 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19011 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19013 error ("shift must be an immediate");
19016 target = gen_reg_rtx (V2DImode);
19017 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19024 case IX86_BUILTIN_FEMMS:
19025 emit_insn (gen_mmx_femms ());
19028 case IX86_BUILTIN_PAVGUSB:
19029 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19031 case IX86_BUILTIN_PF2ID:
19032 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19034 case IX86_BUILTIN_PFACC:
19035 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19037 case IX86_BUILTIN_PFADD:
19038 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19040 case IX86_BUILTIN_PFCMPEQ:
19041 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19043 case IX86_BUILTIN_PFCMPGE:
19044 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19046 case IX86_BUILTIN_PFCMPGT:
19047 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19049 case IX86_BUILTIN_PFMAX:
19050 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19052 case IX86_BUILTIN_PFMIN:
19053 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19055 case IX86_BUILTIN_PFMUL:
19056 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19058 case IX86_BUILTIN_PFRCP:
19059 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19061 case IX86_BUILTIN_PFRCPIT1:
19062 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19064 case IX86_BUILTIN_PFRCPIT2:
19065 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19067 case IX86_BUILTIN_PFRSQIT1:
19068 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19070 case IX86_BUILTIN_PFRSQRT:
19071 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19073 case IX86_BUILTIN_PFSUB:
19074 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19076 case IX86_BUILTIN_PFSUBR:
19077 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19079 case IX86_BUILTIN_PI2FD:
19080 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19082 case IX86_BUILTIN_PMULHRW:
19083 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19085 case IX86_BUILTIN_PF2IW:
19086 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19088 case IX86_BUILTIN_PFNACC:
19089 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19091 case IX86_BUILTIN_PFPNACC:
19092 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19094 case IX86_BUILTIN_PI2FW:
19095 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19097 case IX86_BUILTIN_PSWAPDSI:
19098 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19100 case IX86_BUILTIN_PSWAPDSF:
19101 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19103 case IX86_BUILTIN_SQRTSD:
19104 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19105 case IX86_BUILTIN_LOADUPD:
19106 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19107 case IX86_BUILTIN_STOREUPD:
19108 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19110 case IX86_BUILTIN_MFENCE:
19111 emit_insn (gen_sse2_mfence ());
19113 case IX86_BUILTIN_LFENCE:
19114 emit_insn (gen_sse2_lfence ());
19117 case IX86_BUILTIN_CLFLUSH:
19118 arg0 = CALL_EXPR_ARG (exp, 0);
19119 op0 = expand_normal (arg0);
19120 icode = CODE_FOR_sse2_clflush;
19121 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19122 op0 = copy_to_mode_reg (Pmode, op0);
19124 emit_insn (gen_sse2_clflush (op0));
19127 case IX86_BUILTIN_MOVNTPD:
19128 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19129 case IX86_BUILTIN_MOVNTDQ:
19130 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19131 case IX86_BUILTIN_MOVNTI:
19132 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19134 case IX86_BUILTIN_LOADDQU:
19135 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19136 case IX86_BUILTIN_STOREDQU:
19137 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19139 case IX86_BUILTIN_MONITOR:
19140 arg0 = CALL_EXPR_ARG (exp, 0);
19141 arg1 = CALL_EXPR_ARG (exp, 1);
19142 arg2 = CALL_EXPR_ARG (exp, 2);
19143 op0 = expand_normal (arg0);
19144 op1 = expand_normal (arg1);
19145 op2 = expand_normal (arg2);
19147 op0 = copy_to_mode_reg (Pmode, op0);
19149 op1 = copy_to_mode_reg (SImode, op1);
19151 op2 = copy_to_mode_reg (SImode, op2);
19153 emit_insn (gen_sse3_monitor (op0, op1, op2));
19155 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19158 case IX86_BUILTIN_MWAIT:
19159 arg0 = CALL_EXPR_ARG (exp, 0);
19160 arg1 = CALL_EXPR_ARG (exp, 1);
19161 op0 = expand_normal (arg0);
19162 op1 = expand_normal (arg1);
19164 op0 = copy_to_mode_reg (SImode, op0);
19166 op1 = copy_to_mode_reg (SImode, op1);
19167 emit_insn (gen_sse3_mwait (op0, op1));
19170 case IX86_BUILTIN_LDDQU:
19171 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19174 case IX86_BUILTIN_PALIGNR:
19175 case IX86_BUILTIN_PALIGNR128:
19176 if (fcode == IX86_BUILTIN_PALIGNR)
19178 icode = CODE_FOR_ssse3_palignrdi;
19183 icode = CODE_FOR_ssse3_palignrti;
19186 arg0 = CALL_EXPR_ARG (exp, 0);
19187 arg1 = CALL_EXPR_ARG (exp, 1);
19188 arg2 = CALL_EXPR_ARG (exp, 2);
19189 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
19190 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
19191 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
19192 tmode = insn_data[icode].operand[0].mode;
19193 mode1 = insn_data[icode].operand[1].mode;
19194 mode2 = insn_data[icode].operand[2].mode;
19195 mode3 = insn_data[icode].operand[3].mode;
19197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19199 op0 = copy_to_reg (op0);
19200 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19204 op1 = copy_to_reg (op1);
19205 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19207 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19209 error ("shift must be an immediate");
19212 target = gen_reg_rtx (mode);
19213 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19220 case IX86_BUILTIN_MOVNTDQA:
19221 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19224 case IX86_BUILTIN_MOVNTSD:
19225 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19227 case IX86_BUILTIN_MOVNTSS:
19228 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19230 case IX86_BUILTIN_INSERTQ:
19231 case IX86_BUILTIN_EXTRQ:
19232 icode = (fcode == IX86_BUILTIN_EXTRQ
19233 ? CODE_FOR_sse4a_extrq
19234 : CODE_FOR_sse4a_insertq);
19235 arg0 = CALL_EXPR_ARG (exp, 0);
19236 arg1 = CALL_EXPR_ARG (exp, 1);
19237 op0 = expand_normal (arg0);
19238 op1 = expand_normal (arg1);
19239 tmode = insn_data[icode].operand[0].mode;
19240 mode1 = insn_data[icode].operand[1].mode;
19241 mode2 = insn_data[icode].operand[2].mode;
19242 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19243 op0 = copy_to_mode_reg (mode1, op0);
19244 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19245 op1 = copy_to_mode_reg (mode2, op1);
19246 if (optimize || target == 0
19247 || GET_MODE (target) != tmode
19248 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19249 target = gen_reg_rtx (tmode);
19250 pat = GEN_FCN (icode) (target, op0, op1);
19256 case IX86_BUILTIN_EXTRQI:
19257 icode = CODE_FOR_sse4a_extrqi;
19258 arg0 = CALL_EXPR_ARG (exp, 0);
19259 arg1 = CALL_EXPR_ARG (exp, 1);
19260 arg2 = CALL_EXPR_ARG (exp, 2);
19261 op0 = expand_normal (arg0);
19262 op1 = expand_normal (arg1);
19263 op2 = expand_normal (arg2);
19264 tmode = insn_data[icode].operand[0].mode;
19265 mode1 = insn_data[icode].operand[1].mode;
19266 mode2 = insn_data[icode].operand[2].mode;
19267 mode3 = insn_data[icode].operand[3].mode;
19268 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19269 op0 = copy_to_mode_reg (mode1, op0);
19270 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19272 error ("index mask must be an immediate");
19273 return gen_reg_rtx (tmode);
19275 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19277 error ("length mask must be an immediate");
19278 return gen_reg_rtx (tmode);
19280 if (optimize || target == 0
19281 || GET_MODE (target) != tmode
19282 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19283 target = gen_reg_rtx (tmode);
19284 pat = GEN_FCN (icode) (target, op0, op1, op2);
19290 case IX86_BUILTIN_INSERTQI:
19291 icode = CODE_FOR_sse4a_insertqi;
19292 arg0 = CALL_EXPR_ARG (exp, 0);
19293 arg1 = CALL_EXPR_ARG (exp, 1);
19294 arg2 = CALL_EXPR_ARG (exp, 2);
19295 arg3 = CALL_EXPR_ARG (exp, 3);
19296 op0 = expand_normal (arg0);
19297 op1 = expand_normal (arg1);
19298 op2 = expand_normal (arg2);
19299 op3 = expand_normal (arg3);
19300 tmode = insn_data[icode].operand[0].mode;
19301 mode1 = insn_data[icode].operand[1].mode;
19302 mode2 = insn_data[icode].operand[2].mode;
19303 mode3 = insn_data[icode].operand[3].mode;
19304 mode4 = insn_data[icode].operand[4].mode;
19306 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19307 op0 = copy_to_mode_reg (mode1, op0);
19309 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19310 op1 = copy_to_mode_reg (mode2, op1);
19312 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19314 error ("index mask must be an immediate");
19315 return gen_reg_rtx (tmode);
19317 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19319 error ("length mask must be an immediate");
19320 return gen_reg_rtx (tmode);
19322 if (optimize || target == 0
19323 || GET_MODE (target) != tmode
19324 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19325 target = gen_reg_rtx (tmode);
19326 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19332 case IX86_BUILTIN_VEC_INIT_V2SI:
19333 case IX86_BUILTIN_VEC_INIT_V4HI:
19334 case IX86_BUILTIN_VEC_INIT_V8QI:
19335 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19337 case IX86_BUILTIN_VEC_EXT_V2DF:
19338 case IX86_BUILTIN_VEC_EXT_V2DI:
19339 case IX86_BUILTIN_VEC_EXT_V4SF:
19340 case IX86_BUILTIN_VEC_EXT_V4SI:
19341 case IX86_BUILTIN_VEC_EXT_V8HI:
19342 case IX86_BUILTIN_VEC_EXT_V2SI:
19343 case IX86_BUILTIN_VEC_EXT_V4HI:
19344 case IX86_BUILTIN_VEC_EXT_V16QI:
19345 return ix86_expand_vec_ext_builtin (exp, target);
19347 case IX86_BUILTIN_VEC_SET_V2DI:
19348 case IX86_BUILTIN_VEC_SET_V4SF:
19349 case IX86_BUILTIN_VEC_SET_V4SI:
19350 case IX86_BUILTIN_VEC_SET_V8HI:
19351 case IX86_BUILTIN_VEC_SET_V4HI:
19352 case IX86_BUILTIN_VEC_SET_V16QI:
19353 return ix86_expand_vec_set_builtin (exp);
19359 for (i = 0, d = bdesc_sse_3arg;
19360 i < ARRAY_SIZE (bdesc_sse_3arg);
19362 if (d->code == fcode)
19363 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19366 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19367 if (d->code == fcode)
19369 /* Compares are treated specially. */
19370 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19371 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19372 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19373 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19374 return ix86_expand_sse_compare (d, exp, target);
19376 return ix86_expand_binop_builtin (d->icode, exp, target);
19379 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19380 if (d->code == fcode)
19381 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19383 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19384 if (d->code == fcode)
19385 return ix86_expand_sse_comi (d, exp, target);
19387 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19388 if (d->code == fcode)
19389 return ix86_expand_sse_ptest (d, exp, target);
19391 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19392 if (d->code == fcode)
19393 return ix86_expand_crc32 (d->icode, exp, target);
19395 gcc_unreachable ();
19398 /* Returns a function decl for a vectorized version of the builtin function
19399 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19400 if it is not available. */
19403 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
19406 enum machine_mode in_mode, out_mode;
19409 if (TREE_CODE (type_out) != VECTOR_TYPE
19410 || TREE_CODE (type_in) != VECTOR_TYPE)
19413 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19414 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19415 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19416 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19420 case BUILT_IN_SQRT:
19421 if (out_mode == DFmode && out_n == 2
19422 && in_mode == DFmode && in_n == 2)
19423 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19426 case BUILT_IN_SQRTF:
19427 if (out_mode == SFmode && out_n == 4
19428 && in_mode == SFmode && in_n == 4)
19429 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19432 case BUILT_IN_LRINTF:
19433 if (out_mode == SImode && out_n == 4
19434 && in_mode == SFmode && in_n == 4)
19435 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19445 /* Returns a decl of a function that implements conversion of the
19446 input vector of type TYPE, or NULL_TREE if it is not available. */
19449 ix86_builtin_conversion (enum tree_code code, tree type)
19451 if (TREE_CODE (type) != VECTOR_TYPE)
19457 switch (TYPE_MODE (type))
19460 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19465 case FIX_TRUNC_EXPR:
19466 switch (TYPE_MODE (type))
19469 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19479 /* Store OPERAND to the memory after reload is completed. This means
19480 that we can't easily use assign_stack_local. */
19482 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19486 gcc_assert (reload_completed);
19487 if (TARGET_RED_ZONE)
19489 result = gen_rtx_MEM (mode,
19490 gen_rtx_PLUS (Pmode,
19492 GEN_INT (-RED_ZONE_SIZE)));
19493 emit_move_insn (result, operand);
19495 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19501 operand = gen_lowpart (DImode, operand);
19505 gen_rtx_SET (VOIDmode,
19506 gen_rtx_MEM (DImode,
19507 gen_rtx_PRE_DEC (DImode,
19508 stack_pointer_rtx)),
19512 gcc_unreachable ();
19514 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19523 split_di (&operand, 1, operands, operands + 1);
19525 gen_rtx_SET (VOIDmode,
19526 gen_rtx_MEM (SImode,
19527 gen_rtx_PRE_DEC (Pmode,
19528 stack_pointer_rtx)),
19531 gen_rtx_SET (VOIDmode,
19532 gen_rtx_MEM (SImode,
19533 gen_rtx_PRE_DEC (Pmode,
19534 stack_pointer_rtx)),
19539 /* Store HImodes as SImodes. */
19540 operand = gen_lowpart (SImode, operand);
19544 gen_rtx_SET (VOIDmode,
19545 gen_rtx_MEM (GET_MODE (operand),
19546 gen_rtx_PRE_DEC (SImode,
19547 stack_pointer_rtx)),
19551 gcc_unreachable ();
19553 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19558 /* Free operand from the memory. */
19560 ix86_free_from_memory (enum machine_mode mode)
19562 if (!TARGET_RED_ZONE)
19566 if (mode == DImode || TARGET_64BIT)
19570 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19571 to pop or add instruction if registers are available. */
19572 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19573 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19578 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19579 QImode must go into class Q_REGS.
19580 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19581 movdf to do mem-to-mem moves through integer regs. */
19583 ix86_preferred_reload_class (rtx x, enum reg_class class)
19585 enum machine_mode mode = GET_MODE (x);
19587 /* We're only allowed to return a subclass of CLASS. Many of the
19588 following checks fail for NO_REGS, so eliminate that early. */
19589 if (class == NO_REGS)
19592 /* All classes can load zeros. */
19593 if (x == CONST0_RTX (mode))
19596 /* Force constants into memory if we are loading a (nonzero) constant into
19597 an MMX or SSE register. This is because there are no MMX/SSE instructions
19598 to load from a constant. */
19600 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19603 /* Prefer SSE regs only, if we can use them for math. */
19604 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19605 return SSE_CLASS_P (class) ? class : NO_REGS;
19607 /* Floating-point constants need more complex checks. */
19608 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19610 /* General regs can load everything. */
19611 if (reg_class_subset_p (class, GENERAL_REGS))
19614 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19615 zero above. We only want to wind up preferring 80387 registers if
19616 we plan on doing computation with them. */
19618 && standard_80387_constant_p (x))
19620 /* Limit class to non-sse. */
19621 if (class == FLOAT_SSE_REGS)
19623 if (class == FP_TOP_SSE_REGS)
19625 if (class == FP_SECOND_SSE_REGS)
19626 return FP_SECOND_REG;
19627 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
19634 /* Generally when we see PLUS here, it's the function invariant
19635 (plus soft-fp const_int). Which can only be computed into general
19637 if (GET_CODE (x) == PLUS)
19638 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
19640 /* QImode constants are easy to load, but non-constant QImode data
19641 must go into Q_REGS. */
19642 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19644 if (reg_class_subset_p (class, Q_REGS))
19646 if (reg_class_subset_p (Q_REGS, class))
19654 /* Discourage putting floating-point values in SSE registers unless
19655 SSE math is being used, and likewise for the 387 registers. */
19657 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
19659 enum machine_mode mode = GET_MODE (x);
19661 /* Restrict the output reload class to the register bank that we are doing
19662 math on. If we would like not to return a subset of CLASS, reject this
19663 alternative: if reload cannot do this, it will still use its choice. */
19664 mode = GET_MODE (x);
19665 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19666 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
19668 if (X87_FLOAT_MODE_P (mode))
19670 if (class == FP_TOP_SSE_REGS)
19672 else if (class == FP_SECOND_SSE_REGS)
19673 return FP_SECOND_REG;
19675 return FLOAT_CLASS_P (class) ? class : NO_REGS;
19681 /* If we are copying between general and FP registers, we need a memory
19682 location. The same is true for SSE and MMX registers.
19684 The macro can't work reliably when one of the CLASSES is class containing
19685 registers from multiple units (SSE, MMX, integer). We avoid this by never
19686 combining those units in single alternative in the machine description.
19687 Ensure that this constraint holds to avoid unexpected surprises.
19689 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19690 enforce these sanity checks. */
19693 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
19694 enum machine_mode mode, int strict)
19696 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19697 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19698 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19699 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19700 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19701 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
19703 gcc_assert (!strict);
19707 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19710 /* ??? This is a lie. We do have moves between mmx/general, and for
19711 mmx/sse2. But by saying we need secondary memory we discourage the
19712 register allocator from using the mmx registers unless needed. */
19713 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19716 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19718 /* SSE1 doesn't have any direct moves from other classes. */
19722 /* If the target says that inter-unit moves are more expensive
19723 than moving through memory, then don't generate them. */
19724 if (!TARGET_INTER_UNIT_MOVES)
19727 /* Between SSE and general, we have moves no larger than word size. */
19728 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19735 /* Return true if the registers in CLASS cannot represent the change from
19736 modes FROM to TO. */
19739 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19740 enum reg_class class)
19745 /* x87 registers can't do subreg at all, as all values are reformatted
19746 to extended precision. */
19747 if (MAYBE_FLOAT_CLASS_P (class))
19750 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19752 /* Vector registers do not support QI or HImode loads. If we don't
19753 disallow a change to these modes, reload will assume it's ok to
19754 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19755 the vec_dupv4hi pattern. */
19756 if (GET_MODE_SIZE (from) < 4)
19759 /* Vector registers do not support subreg with nonzero offsets, which
19760 are otherwise valid for integer registers. Since we can't see
19761 whether we have a nonzero offset from here, prohibit all
19762 nonparadoxical subregs changing size. */
19763 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19770 /* Return the cost of moving data from a register in class CLASS1 to
19771 one in class CLASS2.
19773 It is not required that the cost always equal 2 when FROM is the same as TO;
19774 on some machines it is expensive to move between registers if they are not
19775 general registers. */
19778 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19779 enum reg_class class2)
19781 /* In case we require secondary memory, compute cost of the store followed
19782 by load. In order to avoid bad register allocation choices, we need
19783 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19785 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19789 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19790 MEMORY_MOVE_COST (mode, class1, 1));
19791 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19792 MEMORY_MOVE_COST (mode, class2, 1));
19794 /* In case of copying from general_purpose_register we may emit multiple
19795 stores followed by single load causing memory size mismatch stall.
19796 Count this as arbitrarily high cost of 20. */
19797 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19800 /* In the case of FP/MMX moves, the registers actually overlap, and we
19801 have to switch modes in order to treat them differently. */
19802 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19803 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19809 /* Moves between SSE/MMX and integer unit are expensive. */
19810 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19811 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19812 return ix86_cost->mmxsse_to_integer;
19813 if (MAYBE_FLOAT_CLASS_P (class1))
19814 return ix86_cost->fp_move;
19815 if (MAYBE_SSE_CLASS_P (class1))
19816 return ix86_cost->sse_move;
19817 if (MAYBE_MMX_CLASS_P (class1))
19818 return ix86_cost->mmx_move;
19822 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19825 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19827 /* Flags and only flags can only hold CCmode values. */
19828 if (CC_REGNO_P (regno))
19829 return GET_MODE_CLASS (mode) == MODE_CC;
19830 if (GET_MODE_CLASS (mode) == MODE_CC
19831 || GET_MODE_CLASS (mode) == MODE_RANDOM
19832 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19834 if (FP_REGNO_P (regno))
19835 return VALID_FP_MODE_P (mode);
19836 if (SSE_REGNO_P (regno))
19838 /* We implement the move patterns for all vector modes into and
19839 out of SSE registers, even when no operation instructions
19841 return (VALID_SSE_REG_MODE (mode)
19842 || VALID_SSE2_REG_MODE (mode)
19843 || VALID_MMX_REG_MODE (mode)
19844 || VALID_MMX_REG_MODE_3DNOW (mode));
19846 if (MMX_REGNO_P (regno))
19848 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19849 so if the register is available at all, then we can move data of
19850 the given mode into or out of it. */
19851 return (VALID_MMX_REG_MODE (mode)
19852 || VALID_MMX_REG_MODE_3DNOW (mode));
19855 if (mode == QImode)
19857 /* Take care for QImode values - they can be in non-QI regs,
19858 but then they do cause partial register stalls. */
19859 if (regno < 4 || TARGET_64BIT)
19861 if (!TARGET_PARTIAL_REG_STALL)
19863 return reload_in_progress || reload_completed;
19865 /* We handle both integer and floats in the general purpose registers. */
19866 else if (VALID_INT_MODE_P (mode))
19868 else if (VALID_FP_MODE_P (mode))
19870 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19871 on to use that value in smaller contexts, this can easily force a
19872 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19873 supporting DImode, allow it. */
19874 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19880 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19881 tieable integer mode. */
19884 ix86_tieable_integer_mode_p (enum machine_mode mode)
19893 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19896 return TARGET_64BIT;
19903 /* Return true if MODE1 is accessible in a register that can hold MODE2
19904 without copying. That is, all register classes that can hold MODE2
19905 can also hold MODE1. */
19908 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19910 if (mode1 == mode2)
19913 if (ix86_tieable_integer_mode_p (mode1)
19914 && ix86_tieable_integer_mode_p (mode2))
19917 /* MODE2 being XFmode implies fp stack or general regs, which means we
19918 can tie any smaller floating point modes to it. Note that we do not
19919 tie this with TFmode. */
19920 if (mode2 == XFmode)
19921 return mode1 == SFmode || mode1 == DFmode;
19923 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19924 that we can tie it with SFmode. */
19925 if (mode2 == DFmode)
19926 return mode1 == SFmode;
19928 /* If MODE2 is only appropriate for an SSE register, then tie with
19929 any other mode acceptable to SSE registers. */
19930 if (GET_MODE_SIZE (mode2) == 16
19931 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19932 return (GET_MODE_SIZE (mode1) == 16
19933 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19935 /* If MODE2 is appropriate for an MMX register, then tie
19936 with any other mode acceptable to MMX registers. */
19937 if (GET_MODE_SIZE (mode2) == 8
19938 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19939 return (GET_MODE_SIZE (mode1) == 8
19940 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19945 /* Return the cost of moving data of mode M between a
19946 register and memory. A value of 2 is the default; this cost is
19947 relative to those in `REGISTER_MOVE_COST'.
19949 If moving between registers and memory is more expensive than
19950 between two registers, you should define this macro to express the
19953 Model also increased moving costs of QImode registers in non
19957 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19959 if (FLOAT_CLASS_P (class))
19976 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19978 if (SSE_CLASS_P (class))
19981 switch (GET_MODE_SIZE (mode))
19995 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19997 if (MMX_CLASS_P (class))
20000 switch (GET_MODE_SIZE (mode))
20011 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20013 switch (GET_MODE_SIZE (mode))
20017 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
20018 : ix86_cost->movzbl_load);
20020 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
20021 : ix86_cost->int_store[0] + 4);
20024 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20026 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20027 if (mode == TFmode)
20029 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20030 * (((int) GET_MODE_SIZE (mode)
20031 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20035 /* Compute a (partial) cost for rtx X. Return true if the complete
20036 cost has been computed, and false if subexpressions should be
20037 scanned. In either case, *TOTAL contains the cost result. */
20040 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
20042 enum machine_mode mode = GET_MODE (x);
20050 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20052 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20054 else if (flag_pic && SYMBOLIC_CONST (x)
20056 || (!GET_CODE (x) != LABEL_REF
20057 && (GET_CODE (x) != SYMBOL_REF
20058 || !SYMBOL_REF_LOCAL_P (x)))))
20065 if (mode == VOIDmode)
20068 switch (standard_80387_constant_p (x))
20073 default: /* Other constants */
20078 /* Start with (MEM (SYMBOL_REF)), since that's where
20079 it'll probably end up. Add a penalty for size. */
20080 *total = (COSTS_N_INSNS (1)
20081 + (flag_pic != 0 && !TARGET_64BIT)
20082 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20088 /* The zero extensions is often completely free on x86_64, so make
20089 it as cheap as possible. */
20090 if (TARGET_64BIT && mode == DImode
20091 && GET_MODE (XEXP (x, 0)) == SImode)
20093 else if (TARGET_ZERO_EXTEND_WITH_AND)
20094 *total = ix86_cost->add;
20096 *total = ix86_cost->movzx;
20100 *total = ix86_cost->movsx;
20104 if (CONST_INT_P (XEXP (x, 1))
20105 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20107 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20110 *total = ix86_cost->add;
20113 if ((value == 2 || value == 3)
20114 && ix86_cost->lea <= ix86_cost->shift_const)
20116 *total = ix86_cost->lea;
20126 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20128 if (CONST_INT_P (XEXP (x, 1)))
20130 if (INTVAL (XEXP (x, 1)) > 32)
20131 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20133 *total = ix86_cost->shift_const * 2;
20137 if (GET_CODE (XEXP (x, 1)) == AND)
20138 *total = ix86_cost->shift_var * 2;
20140 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20145 if (CONST_INT_P (XEXP (x, 1)))
20146 *total = ix86_cost->shift_const;
20148 *total = ix86_cost->shift_var;
20153 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20155 /* ??? SSE scalar cost should be used here. */
20156 *total = ix86_cost->fmul;
20159 else if (X87_FLOAT_MODE_P (mode))
20161 *total = ix86_cost->fmul;
20164 else if (FLOAT_MODE_P (mode))
20166 /* ??? SSE vector cost should be used here. */
20167 *total = ix86_cost->fmul;
20172 rtx op0 = XEXP (x, 0);
20173 rtx op1 = XEXP (x, 1);
20175 if (CONST_INT_P (XEXP (x, 1)))
20177 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20178 for (nbits = 0; value != 0; value &= value - 1)
20182 /* This is arbitrary. */
20185 /* Compute costs correctly for widening multiplication. */
20186 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20187 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20188 == GET_MODE_SIZE (mode))
20190 int is_mulwiden = 0;
20191 enum machine_mode inner_mode = GET_MODE (op0);
20193 if (GET_CODE (op0) == GET_CODE (op1))
20194 is_mulwiden = 1, op1 = XEXP (op1, 0);
20195 else if (CONST_INT_P (op1))
20197 if (GET_CODE (op0) == SIGN_EXTEND)
20198 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20201 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20205 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20208 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20209 + nbits * ix86_cost->mult_bit
20210 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20219 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20220 /* ??? SSE cost should be used here. */
20221 *total = ix86_cost->fdiv;
20222 else if (X87_FLOAT_MODE_P (mode))
20223 *total = ix86_cost->fdiv;
20224 else if (FLOAT_MODE_P (mode))
20225 /* ??? SSE vector cost should be used here. */
20226 *total = ix86_cost->fdiv;
20228 *total = ix86_cost->divide[MODE_INDEX (mode)];
20232 if (GET_MODE_CLASS (mode) == MODE_INT
20233 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20235 if (GET_CODE (XEXP (x, 0)) == PLUS
20236 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20237 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20238 && CONSTANT_P (XEXP (x, 1)))
20240 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20241 if (val == 2 || val == 4 || val == 8)
20243 *total = ix86_cost->lea;
20244 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20245 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20247 *total += rtx_cost (XEXP (x, 1), outer_code);
20251 else if (GET_CODE (XEXP (x, 0)) == MULT
20252 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20254 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20255 if (val == 2 || val == 4 || val == 8)
20257 *total = ix86_cost->lea;
20258 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20259 *total += rtx_cost (XEXP (x, 1), outer_code);
20263 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20265 *total = ix86_cost->lea;
20266 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20267 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20268 *total += rtx_cost (XEXP (x, 1), outer_code);
20275 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20277 /* ??? SSE cost should be used here. */
20278 *total = ix86_cost->fadd;
20281 else if (X87_FLOAT_MODE_P (mode))
20283 *total = ix86_cost->fadd;
20286 else if (FLOAT_MODE_P (mode))
20288 /* ??? SSE vector cost should be used here. */
20289 *total = ix86_cost->fadd;
20297 if (!TARGET_64BIT && mode == DImode)
20299 *total = (ix86_cost->add * 2
20300 + (rtx_cost (XEXP (x, 0), outer_code)
20301 << (GET_MODE (XEXP (x, 0)) != DImode))
20302 + (rtx_cost (XEXP (x, 1), outer_code)
20303 << (GET_MODE (XEXP (x, 1)) != DImode)));
20309 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20311 /* ??? SSE cost should be used here. */
20312 *total = ix86_cost->fchs;
20315 else if (X87_FLOAT_MODE_P (mode))
20317 *total = ix86_cost->fchs;
20320 else if (FLOAT_MODE_P (mode))
20322 /* ??? SSE vector cost should be used here. */
20323 *total = ix86_cost->fchs;
20329 if (!TARGET_64BIT && mode == DImode)
20330 *total = ix86_cost->add * 2;
20332 *total = ix86_cost->add;
20336 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20337 && XEXP (XEXP (x, 0), 1) == const1_rtx
20338 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20339 && XEXP (x, 1) == const0_rtx)
20341 /* This kind of construct is implemented using test[bwl].
20342 Treat it as if we had an AND. */
20343 *total = (ix86_cost->add
20344 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20345 + rtx_cost (const1_rtx, outer_code));
20351 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20356 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20357 /* ??? SSE cost should be used here. */
20358 *total = ix86_cost->fabs;
20359 else if (X87_FLOAT_MODE_P (mode))
20360 *total = ix86_cost->fabs;
20361 else if (FLOAT_MODE_P (mode))
20362 /* ??? SSE vector cost should be used here. */
20363 *total = ix86_cost->fabs;
20367 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20368 /* ??? SSE cost should be used here. */
20369 *total = ix86_cost->fsqrt;
20370 else if (X87_FLOAT_MODE_P (mode))
20371 *total = ix86_cost->fsqrt;
20372 else if (FLOAT_MODE_P (mode))
20373 /* ??? SSE vector cost should be used here. */
20374 *total = ix86_cost->fsqrt;
20378 if (XINT (x, 1) == UNSPEC_TP)
20389 static int current_machopic_label_num;
20391 /* Given a symbol name and its associated stub, write out the
20392 definition of the stub. */
20395 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20397 unsigned int length;
20398 char *binder_name, *symbol_name, lazy_ptr_name[32];
20399 int label = ++current_machopic_label_num;
20401 /* For 64-bit we shouldn't get here. */
20402 gcc_assert (!TARGET_64BIT);
20404 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20405 symb = (*targetm.strip_name_encoding) (symb);
20407 length = strlen (stub);
20408 binder_name = alloca (length + 32);
20409 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20411 length = strlen (symb);
20412 symbol_name = alloca (length + 32);
20413 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20415 sprintf (lazy_ptr_name, "L%d$lz", label);
20418 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20420 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20422 fprintf (file, "%s:\n", stub);
20423 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20427 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20428 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20429 fprintf (file, "\tjmp\t*%%edx\n");
20432 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20434 fprintf (file, "%s:\n", binder_name);
20438 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20439 fprintf (file, "\tpushl\t%%eax\n");
20442 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20444 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20446 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20447 fprintf (file, "%s:\n", lazy_ptr_name);
20448 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20449 fprintf (file, "\t.long %s\n", binder_name);
20453 darwin_x86_file_end (void)
20455 darwin_file_end ();
20458 #endif /* TARGET_MACHO */
20460 /* Order the registers for register allocator. */
20463 x86_order_regs_for_local_alloc (void)
20468 /* First allocate the local general purpose registers. */
20469 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20470 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20471 reg_alloc_order [pos++] = i;
20473 /* Global general purpose registers. */
20474 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20475 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20476 reg_alloc_order [pos++] = i;
20478 /* x87 registers come first in case we are doing FP math
20480 if (!TARGET_SSE_MATH)
20481 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20482 reg_alloc_order [pos++] = i;
20484 /* SSE registers. */
20485 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20486 reg_alloc_order [pos++] = i;
20487 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20488 reg_alloc_order [pos++] = i;
20490 /* x87 registers. */
20491 if (TARGET_SSE_MATH)
20492 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20493 reg_alloc_order [pos++] = i;
20495 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20496 reg_alloc_order [pos++] = i;
20498 /* Initialize the rest of array as we do not allocate some registers
20500 while (pos < FIRST_PSEUDO_REGISTER)
20501 reg_alloc_order [pos++] = 0;
20504 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20505 struct attribute_spec.handler. */
20507 ix86_handle_struct_attribute (tree *node, tree name,
20508 tree args ATTRIBUTE_UNUSED,
20509 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20512 if (DECL_P (*node))
20514 if (TREE_CODE (*node) == TYPE_DECL)
20515 type = &TREE_TYPE (*node);
20520 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20521 || TREE_CODE (*type) == UNION_TYPE)))
20523 warning (OPT_Wattributes, "%qs attribute ignored",
20524 IDENTIFIER_POINTER (name));
20525 *no_add_attrs = true;
20528 else if ((is_attribute_p ("ms_struct", name)
20529 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20530 || ((is_attribute_p ("gcc_struct", name)
20531 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20533 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20534 IDENTIFIER_POINTER (name));
20535 *no_add_attrs = true;
20542 ix86_ms_bitfield_layout_p (tree record_type)
20544 return (TARGET_MS_BITFIELD_LAYOUT &&
20545 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20546 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20549 /* Returns an expression indicating where the this parameter is
20550 located on entry to the FUNCTION. */
20553 x86_this_parameter (tree function)
20555 tree type = TREE_TYPE (function);
20556 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20560 const int *parm_regs;
20562 if (TARGET_64BIT_MS_ABI)
20563 parm_regs = x86_64_ms_abi_int_parameter_registers;
20565 parm_regs = x86_64_int_parameter_registers;
20566 return gen_rtx_REG (DImode, parm_regs[aggr]);
20569 if (ix86_function_regparm (type, function) > 0
20570 && !type_has_variadic_args_p (type))
20573 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20575 return gen_rtx_REG (SImode, regno);
20578 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20581 /* Determine whether x86_output_mi_thunk can succeed. */
20584 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20585 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20586 HOST_WIDE_INT vcall_offset, tree function)
20588 /* 64-bit can handle anything. */
20592 /* For 32-bit, everything's fine if we have one free register. */
20593 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20596 /* Need a free register for vcall_offset. */
20600 /* Need a free register for GOT references. */
20601 if (flag_pic && !(*targetm.binds_local_p) (function))
20604 /* Otherwise ok. */
20608 /* Output the assembler code for a thunk function. THUNK_DECL is the
20609 declaration for the thunk function itself, FUNCTION is the decl for
20610 the target function. DELTA is an immediate constant offset to be
20611 added to THIS. If VCALL_OFFSET is nonzero, the word at
20612 *(*this + vcall_offset) should be added to THIS. */
20615 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20616 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20617 HOST_WIDE_INT vcall_offset, tree function)
20620 rtx this = x86_this_parameter (function);
20623 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20624 pull it in now and let DELTA benefit. */
20627 else if (vcall_offset)
20629 /* Put the this parameter into %eax. */
20631 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20632 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20635 this_reg = NULL_RTX;
20637 /* Adjust the this parameter by a fixed constant. */
20640 xops[0] = GEN_INT (delta);
20641 xops[1] = this_reg ? this_reg : this;
20644 if (!x86_64_general_operand (xops[0], DImode))
20646 tmp = gen_rtx_REG (DImode, R10_REG);
20648 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20652 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20655 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20658 /* Adjust the this parameter by a value stored in the vtable. */
20662 tmp = gen_rtx_REG (DImode, R10_REG);
20665 int tmp_regno = 2 /* ECX */;
20666 if (lookup_attribute ("fastcall",
20667 TYPE_ATTRIBUTES (TREE_TYPE (function))))
20668 tmp_regno = 0 /* EAX */;
20669 tmp = gen_rtx_REG (SImode, tmp_regno);
20672 xops[0] = gen_rtx_MEM (Pmode, this_reg);
20675 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20677 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20679 /* Adjust the this parameter. */
20680 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
20681 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
20683 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
20684 xops[0] = GEN_INT (vcall_offset);
20686 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20687 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
20689 xops[1] = this_reg;
20691 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20693 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20696 /* If necessary, drop THIS back to its stack slot. */
20697 if (this_reg && this_reg != this)
20699 xops[0] = this_reg;
20701 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20704 xops[0] = XEXP (DECL_RTL (function), 0);
20707 if (!flag_pic || (*targetm.binds_local_p) (function))
20708 output_asm_insn ("jmp\t%P0", xops);
20709 /* All thunks should be in the same object as their target,
20710 and thus binds_local_p should be true. */
20711 else if (TARGET_64BIT_MS_ABI)
20712 gcc_unreachable ();
20715 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
20716 tmp = gen_rtx_CONST (Pmode, tmp);
20717 tmp = gen_rtx_MEM (QImode, tmp);
20719 output_asm_insn ("jmp\t%A0", xops);
20724 if (!flag_pic || (*targetm.binds_local_p) (function))
20725 output_asm_insn ("jmp\t%P0", xops);
20730 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20731 tmp = (gen_rtx_SYMBOL_REF
20733 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20734 tmp = gen_rtx_MEM (QImode, tmp);
20736 output_asm_insn ("jmp\t%0", xops);
20739 #endif /* TARGET_MACHO */
20741 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20742 output_set_got (tmp, NULL_RTX);
20745 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20746 output_asm_insn ("jmp\t{*}%1", xops);
20752 x86_file_start (void)
20754 default_file_start ();
20756 darwin_file_start ();
20758 if (X86_FILE_START_VERSION_DIRECTIVE)
20759 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20760 if (X86_FILE_START_FLTUSED)
20761 fputs ("\t.global\t__fltused\n", asm_out_file);
20762 if (ix86_asm_dialect == ASM_INTEL)
20763 fputs ("\t.intel_syntax\n", asm_out_file);
20767 x86_field_alignment (tree field, int computed)
20769 enum machine_mode mode;
20770 tree type = TREE_TYPE (field);
20772 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20774 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20775 ? get_inner_array_type (type) : type);
20776 if (mode == DFmode || mode == DCmode
20777 || GET_MODE_CLASS (mode) == MODE_INT
20778 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20779 return MIN (32, computed);
20783 /* Output assembler code to FILE to increment profiler label # LABELNO
20784 for profiling a function entry. */
20786 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20790 #ifndef NO_PROFILE_COUNTERS
20791 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20794 if (!TARGET_64BIT_MS_ABI && flag_pic)
20795 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20797 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20801 #ifndef NO_PROFILE_COUNTERS
20802 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20803 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20805 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20809 #ifndef NO_PROFILE_COUNTERS
20810 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20811 PROFILE_COUNT_REGISTER);
20813 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20817 /* We don't have exact information about the insn sizes, but we may assume
20818 quite safely that we are informed about all 1 byte insns and memory
20819 address sizes. This is enough to eliminate unnecessary padding in
20823 min_insn_size (rtx insn)
20827 if (!INSN_P (insn) || !active_insn_p (insn))
20830 /* Discard alignments we've emit and jump instructions. */
20831 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20832 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20835 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20836 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20839 /* Important case - calls are always 5 bytes.
20840 It is common to have many calls in the row. */
20842 && symbolic_reference_mentioned_p (PATTERN (insn))
20843 && !SIBLING_CALL_P (insn))
20845 if (get_attr_length (insn) <= 1)
20848 /* For normal instructions we may rely on the sizes of addresses
20849 and the presence of symbol to require 4 bytes of encoding.
20850 This is not the case for jumps where references are PC relative. */
20851 if (!JUMP_P (insn))
20853 l = get_attr_length_address (insn);
20854 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20863 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20867 ix86_avoid_jump_misspredicts (void)
20869 rtx insn, start = get_insns ();
20870 int nbytes = 0, njumps = 0;
20873 /* Look for all minimal intervals of instructions containing 4 jumps.
20874 The intervals are bounded by START and INSN. NBYTES is the total
20875 size of instructions in the interval including INSN and not including
20876 START. When the NBYTES is smaller than 16 bytes, it is possible
20877 that the end of START and INSN ends up in the same 16byte page.
20879 The smallest offset in the page INSN can start is the case where START
20880 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20881 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20883 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20886 nbytes += min_insn_size (insn);
20888 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20889 INSN_UID (insn), min_insn_size (insn));
20891 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20892 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20900 start = NEXT_INSN (start);
20901 if ((JUMP_P (start)
20902 && GET_CODE (PATTERN (start)) != ADDR_VEC
20903 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20905 njumps--, isjump = 1;
20908 nbytes -= min_insn_size (start);
20910 gcc_assert (njumps >= 0);
20912 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20913 INSN_UID (start), INSN_UID (insn), nbytes);
20915 if (njumps == 3 && isjump && nbytes < 16)
20917 int padsize = 15 - nbytes + min_insn_size (insn);
20920 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20921 INSN_UID (insn), padsize);
20922 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20927 /* AMD Athlon works faster
20928 when RET is not destination of conditional jump or directly preceded
20929 by other jump instruction. We avoid the penalty by inserting NOP just
20930 before the RET instructions in such cases. */
20932 ix86_pad_returns (void)
20937 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20939 basic_block bb = e->src;
20940 rtx ret = BB_END (bb);
20942 bool replace = false;
20944 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20945 || !maybe_hot_bb_p (bb))
20947 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20948 if (active_insn_p (prev) || LABEL_P (prev))
20950 if (prev && LABEL_P (prev))
20955 FOR_EACH_EDGE (e, ei, bb->preds)
20956 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20957 && !(e->flags & EDGE_FALLTHRU))
20962 prev = prev_active_insn (ret);
20964 && ((JUMP_P (prev) && any_condjump_p (prev))
20967 /* Empty functions get branch mispredict even when the jump destination
20968 is not visible to us. */
20969 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20974 emit_insn_before (gen_return_internal_long (), ret);
20980 /* Implement machine specific optimizations. We implement padding of returns
20981 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20985 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20986 ix86_pad_returns ();
20987 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20988 ix86_avoid_jump_misspredicts ();
20991 /* Return nonzero when QImode register that must be represented via REX prefix
20994 x86_extended_QIreg_mentioned_p (rtx insn)
20997 extract_insn_cached (insn);
20998 for (i = 0; i < recog_data.n_operands; i++)
20999 if (REG_P (recog_data.operand[i])
21000 && REGNO (recog_data.operand[i]) >= 4)
21005 /* Return nonzero when P points to register encoded via REX prefix.
21006 Called via for_each_rtx. */
21008 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21010 unsigned int regno;
21013 regno = REGNO (*p);
21014 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21017 /* Return true when INSN mentions register that must be encoded using REX
21020 x86_extended_reg_mentioned_p (rtx insn)
21022 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21025 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21026 optabs would emit if we didn't have TFmode patterns. */
21029 x86_emit_floatuns (rtx operands[2])
21031 rtx neglab, donelab, i0, i1, f0, in, out;
21032 enum machine_mode mode, inmode;
21034 inmode = GET_MODE (operands[1]);
21035 gcc_assert (inmode == SImode || inmode == DImode);
21038 in = force_reg (inmode, operands[1]);
21039 mode = GET_MODE (out);
21040 neglab = gen_label_rtx ();
21041 donelab = gen_label_rtx ();
21042 f0 = gen_reg_rtx (mode);
21044 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21046 expand_float (out, in, 0);
21048 emit_jump_insn (gen_jump (donelab));
21051 emit_label (neglab);
21053 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21055 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21057 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21059 expand_float (f0, i0, 0);
21061 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21063 emit_label (donelab);
21066 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21067 with all elements equal to VAR. Return true if successful. */
21070 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21071 rtx target, rtx val)
21073 enum machine_mode smode, wsmode, wvmode;
21088 val = force_reg (GET_MODE_INNER (mode), val);
21089 x = gen_rtx_VEC_DUPLICATE (mode, val);
21090 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21096 if (TARGET_SSE || TARGET_3DNOW_A)
21098 val = gen_lowpart (SImode, val);
21099 x = gen_rtx_TRUNCATE (HImode, val);
21100 x = gen_rtx_VEC_DUPLICATE (mode, x);
21101 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21123 /* Extend HImode to SImode using a paradoxical SUBREG. */
21124 tmp1 = gen_reg_rtx (SImode);
21125 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21126 /* Insert the SImode value as low element of V4SImode vector. */
21127 tmp2 = gen_reg_rtx (V4SImode);
21128 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21129 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21130 CONST0_RTX (V4SImode),
21132 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21133 /* Cast the V4SImode vector back to a V8HImode vector. */
21134 tmp1 = gen_reg_rtx (V8HImode);
21135 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21136 /* Duplicate the low short through the whole low SImode word. */
21137 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21138 /* Cast the V8HImode vector back to a V4SImode vector. */
21139 tmp2 = gen_reg_rtx (V4SImode);
21140 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21141 /* Replicate the low element of the V4SImode vector. */
21142 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21143 /* Cast the V2SImode back to V8HImode, and store in target. */
21144 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21155 /* Extend QImode to SImode using a paradoxical SUBREG. */
21156 tmp1 = gen_reg_rtx (SImode);
21157 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21158 /* Insert the SImode value as low element of V4SImode vector. */
21159 tmp2 = gen_reg_rtx (V4SImode);
21160 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21161 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21162 CONST0_RTX (V4SImode),
21164 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21165 /* Cast the V4SImode vector back to a V16QImode vector. */
21166 tmp1 = gen_reg_rtx (V16QImode);
21167 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21168 /* Duplicate the low byte through the whole low SImode word. */
21169 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21170 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21171 /* Cast the V16QImode vector back to a V4SImode vector. */
21172 tmp2 = gen_reg_rtx (V4SImode);
21173 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21174 /* Replicate the low element of the V4SImode vector. */
21175 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21176 /* Cast the V2SImode back to V16QImode, and store in target. */
21177 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21185 /* Replicate the value once into the next wider mode and recurse. */
21186 val = convert_modes (wsmode, smode, val, true);
21187 x = expand_simple_binop (wsmode, ASHIFT, val,
21188 GEN_INT (GET_MODE_BITSIZE (smode)),
21189 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21190 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21192 x = gen_reg_rtx (wvmode);
21193 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21194 gcc_unreachable ();
21195 emit_move_insn (target, gen_lowpart (mode, x));
21203 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21204 whose ONE_VAR element is VAR, and other elements are zero. Return true
21208 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21209 rtx target, rtx var, int one_var)
21211 enum machine_mode vsimode;
21227 var = force_reg (GET_MODE_INNER (mode), var);
21228 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21229 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21234 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21235 new_target = gen_reg_rtx (mode);
21237 new_target = target;
21238 var = force_reg (GET_MODE_INNER (mode), var);
21239 x = gen_rtx_VEC_DUPLICATE (mode, var);
21240 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21241 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21244 /* We need to shuffle the value to the correct position, so
21245 create a new pseudo to store the intermediate result. */
21247 /* With SSE2, we can use the integer shuffle insns. */
21248 if (mode != V4SFmode && TARGET_SSE2)
21250 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21252 GEN_INT (one_var == 1 ? 0 : 1),
21253 GEN_INT (one_var == 2 ? 0 : 1),
21254 GEN_INT (one_var == 3 ? 0 : 1)));
21255 if (target != new_target)
21256 emit_move_insn (target, new_target);
21260 /* Otherwise convert the intermediate result to V4SFmode and
21261 use the SSE1 shuffle instructions. */
21262 if (mode != V4SFmode)
21264 tmp = gen_reg_rtx (V4SFmode);
21265 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21270 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21272 GEN_INT (one_var == 1 ? 0 : 1),
21273 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21274 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21276 if (mode != V4SFmode)
21277 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21278 else if (tmp != target)
21279 emit_move_insn (target, tmp);
21281 else if (target != new_target)
21282 emit_move_insn (target, new_target);
21287 vsimode = V4SImode;
21293 vsimode = V2SImode;
21299 /* Zero extend the variable element to SImode and recurse. */
21300 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21302 x = gen_reg_rtx (vsimode);
21303 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21305 gcc_unreachable ();
21307 emit_move_insn (target, gen_lowpart (mode, x));
21315 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21316 consisting of the values in VALS. It is known that all elements
21317 except ONE_VAR are constants. Return true if successful. */
21320 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21321 rtx target, rtx vals, int one_var)
21323 rtx var = XVECEXP (vals, 0, one_var);
21324 enum machine_mode wmode;
21327 const_vec = copy_rtx (vals);
21328 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21329 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21337 /* For the two element vectors, it's just as easy to use
21338 the general case. */
21354 /* There's no way to set one QImode entry easily. Combine
21355 the variable value with its adjacent constant value, and
21356 promote to an HImode set. */
21357 x = XVECEXP (vals, 0, one_var ^ 1);
21360 var = convert_modes (HImode, QImode, var, true);
21361 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21362 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21363 x = GEN_INT (INTVAL (x) & 0xff);
21367 var = convert_modes (HImode, QImode, var, true);
21368 x = gen_int_mode (INTVAL (x) << 8, HImode);
21370 if (x != const0_rtx)
21371 var = expand_simple_binop (HImode, IOR, var, x, var,
21372 1, OPTAB_LIB_WIDEN);
21374 x = gen_reg_rtx (wmode);
21375 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21376 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21378 emit_move_insn (target, gen_lowpart (mode, x));
21385 emit_move_insn (target, const_vec);
21386 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21390 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21391 all values variable, and none identical. */
21394 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21395 rtx target, rtx vals)
21397 enum machine_mode half_mode = GET_MODE_INNER (mode);
21398 rtx op0 = NULL, op1 = NULL;
21399 bool use_vec_concat = false;
21405 if (!mmx_ok && !TARGET_SSE)
21411 /* For the two element vectors, we always implement VEC_CONCAT. */
21412 op0 = XVECEXP (vals, 0, 0);
21413 op1 = XVECEXP (vals, 0, 1);
21414 use_vec_concat = true;
21418 half_mode = V2SFmode;
21421 half_mode = V2SImode;
21427 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21428 Recurse to load the two halves. */
21430 op0 = gen_reg_rtx (half_mode);
21431 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21432 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21434 op1 = gen_reg_rtx (half_mode);
21435 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21436 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21438 use_vec_concat = true;
21449 gcc_unreachable ();
21452 if (use_vec_concat)
21454 if (!register_operand (op0, half_mode))
21455 op0 = force_reg (half_mode, op0);
21456 if (!register_operand (op1, half_mode))
21457 op1 = force_reg (half_mode, op1);
21459 emit_insn (gen_rtx_SET (VOIDmode, target,
21460 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21464 int i, j, n_elts, n_words, n_elt_per_word;
21465 enum machine_mode inner_mode;
21466 rtx words[4], shift;
21468 inner_mode = GET_MODE_INNER (mode);
21469 n_elts = GET_MODE_NUNITS (mode);
21470 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21471 n_elt_per_word = n_elts / n_words;
21472 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21474 for (i = 0; i < n_words; ++i)
21476 rtx word = NULL_RTX;
21478 for (j = 0; j < n_elt_per_word; ++j)
21480 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21481 elt = convert_modes (word_mode, inner_mode, elt, true);
21487 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21488 word, 1, OPTAB_LIB_WIDEN);
21489 word = expand_simple_binop (word_mode, IOR, word, elt,
21490 word, 1, OPTAB_LIB_WIDEN);
21498 emit_move_insn (target, gen_lowpart (mode, words[0]));
21499 else if (n_words == 2)
21501 rtx tmp = gen_reg_rtx (mode);
21502 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21503 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21504 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21505 emit_move_insn (target, tmp);
21507 else if (n_words == 4)
21509 rtx tmp = gen_reg_rtx (V4SImode);
21510 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21511 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21512 emit_move_insn (target, gen_lowpart (mode, tmp));
21515 gcc_unreachable ();
21519 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21520 instructions unless MMX_OK is true. */
21523 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21525 enum machine_mode mode = GET_MODE (target);
21526 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21527 int n_elts = GET_MODE_NUNITS (mode);
21528 int n_var = 0, one_var = -1;
21529 bool all_same = true, all_const_zero = true;
21533 for (i = 0; i < n_elts; ++i)
21535 x = XVECEXP (vals, 0, i);
21536 if (!CONSTANT_P (x))
21537 n_var++, one_var = i;
21538 else if (x != CONST0_RTX (inner_mode))
21539 all_const_zero = false;
21540 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21544 /* Constants are best loaded from the constant pool. */
21547 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21551 /* If all values are identical, broadcast the value. */
21553 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21554 XVECEXP (vals, 0, 0)))
21557 /* Values where only one field is non-constant are best loaded from
21558 the pool and overwritten via move later. */
21562 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21563 XVECEXP (vals, 0, one_var),
21567 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21571 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21575 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21577 enum machine_mode mode = GET_MODE (target);
21578 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21579 bool use_vec_merge = false;
21588 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21589 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21591 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21593 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21594 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21600 use_vec_merge = TARGET_SSE4_1;
21608 /* For the two element vectors, we implement a VEC_CONCAT with
21609 the extraction of the other element. */
21611 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21612 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21615 op0 = val, op1 = tmp;
21617 op0 = tmp, op1 = val;
21619 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21620 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21625 use_vec_merge = TARGET_SSE4_1;
21632 use_vec_merge = true;
21636 /* tmp = target = A B C D */
21637 tmp = copy_to_reg (target);
21638 /* target = A A B B */
21639 emit_insn (gen_sse_unpcklps (target, target, target));
21640 /* target = X A B B */
21641 ix86_expand_vector_set (false, target, val, 0);
21642 /* target = A X C D */
21643 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21644 GEN_INT (1), GEN_INT (0),
21645 GEN_INT (2+4), GEN_INT (3+4)));
21649 /* tmp = target = A B C D */
21650 tmp = copy_to_reg (target);
21651 /* tmp = X B C D */
21652 ix86_expand_vector_set (false, tmp, val, 0);
21653 /* target = A B X D */
21654 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21655 GEN_INT (0), GEN_INT (1),
21656 GEN_INT (0+4), GEN_INT (3+4)));
21660 /* tmp = target = A B C D */
21661 tmp = copy_to_reg (target);
21662 /* tmp = X B C D */
21663 ix86_expand_vector_set (false, tmp, val, 0);
21664 /* target = A B X D */
21665 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21666 GEN_INT (0), GEN_INT (1),
21667 GEN_INT (2+4), GEN_INT (0+4)));
21671 gcc_unreachable ();
21676 use_vec_merge = TARGET_SSE4_1;
21680 /* Element 0 handled by vec_merge below. */
21683 use_vec_merge = true;
21689 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21690 store into element 0, then shuffle them back. */
21694 order[0] = GEN_INT (elt);
21695 order[1] = const1_rtx;
21696 order[2] = const2_rtx;
21697 order[3] = GEN_INT (3);
21698 order[elt] = const0_rtx;
21700 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21701 order[1], order[2], order[3]));
21703 ix86_expand_vector_set (false, target, val, 0);
21705 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21706 order[1], order[2], order[3]));
21710 /* For SSE1, we have to reuse the V4SF code. */
21711 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
21712 gen_lowpart (SFmode, val), elt);
21717 use_vec_merge = TARGET_SSE2;
21720 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21724 use_vec_merge = TARGET_SSE4_1;
21734 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21735 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21736 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21740 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21742 emit_move_insn (mem, target);
21744 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21745 emit_move_insn (tmp, val);
21747 emit_move_insn (target, mem);
21752 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21754 enum machine_mode mode = GET_MODE (vec);
21755 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21756 bool use_vec_extr = false;
21769 use_vec_extr = true;
21773 use_vec_extr = TARGET_SSE4_1;
21785 tmp = gen_reg_rtx (mode);
21786 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21787 GEN_INT (elt), GEN_INT (elt),
21788 GEN_INT (elt+4), GEN_INT (elt+4)));
21792 tmp = gen_reg_rtx (mode);
21793 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21797 gcc_unreachable ();
21800 use_vec_extr = true;
21805 use_vec_extr = TARGET_SSE4_1;
21819 tmp = gen_reg_rtx (mode);
21820 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21821 GEN_INT (elt), GEN_INT (elt),
21822 GEN_INT (elt), GEN_INT (elt)));
21826 tmp = gen_reg_rtx (mode);
21827 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21831 gcc_unreachable ();
21834 use_vec_extr = true;
21839 /* For SSE1, we have to reuse the V4SF code. */
21840 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21841 gen_lowpart (V4SFmode, vec), elt);
21847 use_vec_extr = TARGET_SSE2;
21850 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21854 use_vec_extr = TARGET_SSE4_1;
21858 /* ??? Could extract the appropriate HImode element and shift. */
21865 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21866 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21868 /* Let the rtl optimizers know about the zero extension performed. */
21869 if (inner_mode == QImode || inner_mode == HImode)
21871 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21872 target = gen_lowpart (SImode, target);
21875 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21879 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21881 emit_move_insn (mem, vec);
21883 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21884 emit_move_insn (target, tmp);
21888 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21889 pattern to reduce; DEST is the destination; IN is the input vector. */
21892 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21894 rtx tmp1, tmp2, tmp3;
21896 tmp1 = gen_reg_rtx (V4SFmode);
21897 tmp2 = gen_reg_rtx (V4SFmode);
21898 tmp3 = gen_reg_rtx (V4SFmode);
21900 emit_insn (gen_sse_movhlps (tmp1, in, in));
21901 emit_insn (fn (tmp2, tmp1, in));
21903 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21904 GEN_INT (1), GEN_INT (1),
21905 GEN_INT (1+4), GEN_INT (1+4)));
21906 emit_insn (fn (dest, tmp2, tmp3));
21909 /* Target hook for scalar_mode_supported_p. */
21911 ix86_scalar_mode_supported_p (enum machine_mode mode)
21913 if (DECIMAL_FLOAT_MODE_P (mode))
21915 else if (mode == TFmode)
21916 return TARGET_64BIT;
21918 return default_scalar_mode_supported_p (mode);
21921 /* Implements target hook vector_mode_supported_p. */
21923 ix86_vector_mode_supported_p (enum machine_mode mode)
21925 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21927 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21929 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21931 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21936 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21938 We do this in the new i386 backend to maintain source compatibility
21939 with the old cc0-based compiler. */
21942 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21943 tree inputs ATTRIBUTE_UNUSED,
21946 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21948 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21953 /* Implements target vector targetm.asm.encode_section_info. This
21954 is not used by netware. */
21956 static void ATTRIBUTE_UNUSED
21957 ix86_encode_section_info (tree decl, rtx rtl, int first)
21959 default_encode_section_info (decl, rtl, first);
21961 if (TREE_CODE (decl) == VAR_DECL
21962 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21963 && ix86_in_large_data_p (decl))
21964 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21967 /* Worker function for REVERSE_CONDITION. */
21970 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21972 return (mode != CCFPmode && mode != CCFPUmode
21973 ? reverse_condition (code)
21974 : reverse_condition_maybe_unordered (code));
21977 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21981 output_387_reg_move (rtx insn, rtx *operands)
21983 if (REG_P (operands[0]))
21985 if (REG_P (operands[1])
21986 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21988 if (REGNO (operands[0]) == FIRST_STACK_REG)
21989 return output_387_ffreep (operands, 0);
21990 return "fstp\t%y0";
21992 if (STACK_TOP_P (operands[0]))
21993 return "fld%z1\t%y1";
21996 else if (MEM_P (operands[0]))
21998 gcc_assert (REG_P (operands[1]));
21999 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22000 return "fstp%z0\t%y0";
22003 /* There is no non-popping store to memory for XFmode.
22004 So if we need one, follow the store with a load. */
22005 if (GET_MODE (operands[0]) == XFmode)
22006 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22008 return "fst%z0\t%y0";
22015 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22016 FP status register is set. */
22019 ix86_emit_fp_unordered_jump (rtx label)
22021 rtx reg = gen_reg_rtx (HImode);
22024 emit_insn (gen_x86_fnstsw_1 (reg));
22026 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22028 emit_insn (gen_x86_sahf_1 (reg));
22030 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22031 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22035 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22037 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22038 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22041 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22042 gen_rtx_LABEL_REF (VOIDmode, label),
22044 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22046 emit_jump_insn (temp);
22047 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22050 /* Output code to perform a log1p XFmode calculation. */
22052 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22054 rtx label1 = gen_label_rtx ();
22055 rtx label2 = gen_label_rtx ();
22057 rtx tmp = gen_reg_rtx (XFmode);
22058 rtx tmp2 = gen_reg_rtx (XFmode);
22060 emit_insn (gen_absxf2 (tmp, op1));
22061 emit_insn (gen_cmpxf (tmp,
22062 CONST_DOUBLE_FROM_REAL_VALUE (
22063 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22065 emit_jump_insn (gen_bge (label1));
22067 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22068 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22069 emit_jump (label2);
22071 emit_label (label1);
22072 emit_move_insn (tmp, CONST1_RTX (XFmode));
22073 emit_insn (gen_addxf3 (tmp, op1, tmp));
22074 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22075 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22077 emit_label (label2);
22080 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22082 static void ATTRIBUTE_UNUSED
22083 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22086 /* With Binutils 2.15, the "@unwind" marker must be specified on
22087 every occurrence of the ".eh_frame" section, not just the first
22090 && strcmp (name, ".eh_frame") == 0)
22092 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22093 flags & SECTION_WRITE ? "aw" : "a");
22096 default_elf_asm_named_section (name, flags, decl);
22099 /* Return the mangling of TYPE if it is an extended fundamental type. */
22101 static const char *
22102 ix86_mangle_fundamental_type (tree type)
22104 switch (TYPE_MODE (type))
22107 /* __float128 is "g". */
22110 /* "long double" or __float80 is "e". */
22117 /* For 32-bit code we can save PIC register setup by using
22118 __stack_chk_fail_local hidden function instead of calling
22119 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22120 register, so it is better to call __stack_chk_fail directly. */
22123 ix86_stack_protect_fail (void)
22125 return TARGET_64BIT
22126 ? default_external_stack_protect_fail ()
22127 : default_hidden_stack_protect_fail ();
22130 /* Select a format to encode pointers in exception handling data. CODE
22131 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22132 true if the symbol may be affected by dynamic relocations.
22134 ??? All x86 object file formats are capable of representing this.
22135 After all, the relocation needed is the same as for the call insn.
22136 Whether or not a particular assembler allows us to enter such, I
22137 guess we'll have to see. */
22139 asm_preferred_eh_data_format (int code, int global)
22143 int type = DW_EH_PE_sdata8;
22145 || ix86_cmodel == CM_SMALL_PIC
22146 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22147 type = DW_EH_PE_sdata4;
22148 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22150 if (ix86_cmodel == CM_SMALL
22151 || (ix86_cmodel == CM_MEDIUM && code))
22152 return DW_EH_PE_udata4;
22153 return DW_EH_PE_absptr;
22156 /* Expand copysign from SIGN to the positive value ABS_VALUE
22157 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22160 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22162 enum machine_mode mode = GET_MODE (sign);
22163 rtx sgn = gen_reg_rtx (mode);
22164 if (mask == NULL_RTX)
22166 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22167 if (!VECTOR_MODE_P (mode))
22169 /* We need to generate a scalar mode mask in this case. */
22170 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22171 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22172 mask = gen_reg_rtx (mode);
22173 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22177 mask = gen_rtx_NOT (mode, mask);
22178 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22179 gen_rtx_AND (mode, mask, sign)));
22180 emit_insn (gen_rtx_SET (VOIDmode, result,
22181 gen_rtx_IOR (mode, abs_value, sgn)));
22184 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22185 mask for masking out the sign-bit is stored in *SMASK, if that is
22188 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22190 enum machine_mode mode = GET_MODE (op0);
22193 xa = gen_reg_rtx (mode);
22194 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22195 if (!VECTOR_MODE_P (mode))
22197 /* We need to generate a scalar mode mask in this case. */
22198 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22199 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22200 mask = gen_reg_rtx (mode);
22201 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22203 emit_insn (gen_rtx_SET (VOIDmode, xa,
22204 gen_rtx_AND (mode, op0, mask)));
22212 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22213 swapping the operands if SWAP_OPERANDS is true. The expanded
22214 code is a forward jump to a newly created label in case the
22215 comparison is true. The generated label rtx is returned. */
22217 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22218 bool swap_operands)
22229 label = gen_label_rtx ();
22230 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22231 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22232 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22233 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22234 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22235 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22236 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22237 JUMP_LABEL (tmp) = label;
22242 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22243 using comparison code CODE. Operands are swapped for the comparison if
22244 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22246 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22247 bool swap_operands)
22249 enum machine_mode mode = GET_MODE (op0);
22250 rtx mask = gen_reg_rtx (mode);
22259 if (mode == DFmode)
22260 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22261 gen_rtx_fmt_ee (code, mode, op0, op1)));
22263 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22264 gen_rtx_fmt_ee (code, mode, op0, op1)));
22269 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22270 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22272 ix86_gen_TWO52 (enum machine_mode mode)
22274 REAL_VALUE_TYPE TWO52r;
22277 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22278 TWO52 = const_double_from_real_value (TWO52r, mode);
22279 TWO52 = force_reg (mode, TWO52);
22284 /* Expand SSE sequence for computing lround from OP1 storing
22287 ix86_expand_lround (rtx op0, rtx op1)
22289 /* C code for the stuff we're doing below:
22290 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22293 enum machine_mode mode = GET_MODE (op1);
22294 const struct real_format *fmt;
22295 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22298 /* load nextafter (0.5, 0.0) */
22299 fmt = REAL_MODE_FORMAT (mode);
22300 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22301 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22303 /* adj = copysign (0.5, op1) */
22304 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22305 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22307 /* adj = op1 + adj */
22308 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22310 /* op0 = (imode)adj */
22311 expand_fix (op0, adj, 0);
22314 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22317 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22319 /* C code for the stuff we're doing below (for do_floor):
22321 xi -= (double)xi > op1 ? 1 : 0;
22324 enum machine_mode fmode = GET_MODE (op1);
22325 enum machine_mode imode = GET_MODE (op0);
22326 rtx ireg, freg, label, tmp;
22328 /* reg = (long)op1 */
22329 ireg = gen_reg_rtx (imode);
22330 expand_fix (ireg, op1, 0);
22332 /* freg = (double)reg */
22333 freg = gen_reg_rtx (fmode);
22334 expand_float (freg, ireg, 0);
22336 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22337 label = ix86_expand_sse_compare_and_jump (UNLE,
22338 freg, op1, !do_floor);
22339 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22340 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22341 emit_move_insn (ireg, tmp);
22343 emit_label (label);
22344 LABEL_NUSES (label) = 1;
22346 emit_move_insn (op0, ireg);
22349 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22350 result in OPERAND0. */
22352 ix86_expand_rint (rtx operand0, rtx operand1)
22354 /* C code for the stuff we're doing below:
22355 xa = fabs (operand1);
22356 if (!isless (xa, 2**52))
22358 xa = xa + 2**52 - 2**52;
22359 return copysign (xa, operand1);
22361 enum machine_mode mode = GET_MODE (operand0);
22362 rtx res, xa, label, TWO52, mask;
22364 res = gen_reg_rtx (mode);
22365 emit_move_insn (res, operand1);
22367 /* xa = abs (operand1) */
22368 xa = ix86_expand_sse_fabs (res, &mask);
22370 /* if (!isless (xa, TWO52)) goto label; */
22371 TWO52 = ix86_gen_TWO52 (mode);
22372 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22374 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22375 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22377 ix86_sse_copysign_to_positive (res, xa, res, mask);
22379 emit_label (label);
22380 LABEL_NUSES (label) = 1;
22382 emit_move_insn (operand0, res);
22385 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22388 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22390 /* C code for the stuff we expand below.
22391 double xa = fabs (x), x2;
22392 if (!isless (xa, TWO52))
22394 xa = xa + TWO52 - TWO52;
22395 x2 = copysign (xa, x);
22404 enum machine_mode mode = GET_MODE (operand0);
22405 rtx xa, TWO52, tmp, label, one, res, mask;
22407 TWO52 = ix86_gen_TWO52 (mode);
22409 /* Temporary for holding the result, initialized to the input
22410 operand to ease control flow. */
22411 res = gen_reg_rtx (mode);
22412 emit_move_insn (res, operand1);
22414 /* xa = abs (operand1) */
22415 xa = ix86_expand_sse_fabs (res, &mask);
22417 /* if (!isless (xa, TWO52)) goto label; */
22418 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22420 /* xa = xa + TWO52 - TWO52; */
22421 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22422 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22424 /* xa = copysign (xa, operand1) */
22425 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22427 /* generate 1.0 or -1.0 */
22428 one = force_reg (mode,
22429 const_double_from_real_value (do_floor
22430 ? dconst1 : dconstm1, mode));
22432 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22433 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22434 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22435 gen_rtx_AND (mode, one, tmp)));
22436 /* We always need to subtract here to preserve signed zero. */
22437 tmp = expand_simple_binop (mode, MINUS,
22438 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22439 emit_move_insn (res, tmp);
22441 emit_label (label);
22442 LABEL_NUSES (label) = 1;
22444 emit_move_insn (operand0, res);
22447 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22450 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22452 /* C code for the stuff we expand below.
22453 double xa = fabs (x), x2;
22454 if (!isless (xa, TWO52))
22456 x2 = (double)(long)x;
22463 if (HONOR_SIGNED_ZEROS (mode))
22464 return copysign (x2, x);
22467 enum machine_mode mode = GET_MODE (operand0);
22468 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22470 TWO52 = ix86_gen_TWO52 (mode);
22472 /* Temporary for holding the result, initialized to the input
22473 operand to ease control flow. */
22474 res = gen_reg_rtx (mode);
22475 emit_move_insn (res, operand1);
22477 /* xa = abs (operand1) */
22478 xa = ix86_expand_sse_fabs (res, &mask);
22480 /* if (!isless (xa, TWO52)) goto label; */
22481 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22483 /* xa = (double)(long)x */
22484 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22485 expand_fix (xi, res, 0);
22486 expand_float (xa, xi, 0);
22489 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22491 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22492 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22493 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22494 gen_rtx_AND (mode, one, tmp)));
22495 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22496 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22497 emit_move_insn (res, tmp);
22499 if (HONOR_SIGNED_ZEROS (mode))
22500 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22502 emit_label (label);
22503 LABEL_NUSES (label) = 1;
22505 emit_move_insn (operand0, res);
22508 /* Expand SSE sequence for computing round from OPERAND1 storing
22509 into OPERAND0. Sequence that works without relying on DImode truncation
22510 via cvttsd2siq that is only available on 64bit targets. */
22512 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22514 /* C code for the stuff we expand below.
22515 double xa = fabs (x), xa2, x2;
22516 if (!isless (xa, TWO52))
22518 Using the absolute value and copying back sign makes
22519 -0.0 -> -0.0 correct.
22520 xa2 = xa + TWO52 - TWO52;
22525 else if (dxa > 0.5)
22527 x2 = copysign (xa2, x);
22530 enum machine_mode mode = GET_MODE (operand0);
22531 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22533 TWO52 = ix86_gen_TWO52 (mode);
22535 /* Temporary for holding the result, initialized to the input
22536 operand to ease control flow. */
22537 res = gen_reg_rtx (mode);
22538 emit_move_insn (res, operand1);
22540 /* xa = abs (operand1) */
22541 xa = ix86_expand_sse_fabs (res, &mask);
22543 /* if (!isless (xa, TWO52)) goto label; */
22544 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22546 /* xa2 = xa + TWO52 - TWO52; */
22547 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22548 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22550 /* dxa = xa2 - xa; */
22551 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22553 /* generate 0.5, 1.0 and -0.5 */
22554 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22555 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22556 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22560 tmp = gen_reg_rtx (mode);
22561 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22562 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22563 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22564 gen_rtx_AND (mode, one, tmp)));
22565 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22566 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22567 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22568 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22569 gen_rtx_AND (mode, one, tmp)));
22570 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22572 /* res = copysign (xa2, operand1) */
22573 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22575 emit_label (label);
22576 LABEL_NUSES (label) = 1;
22578 emit_move_insn (operand0, res);
22581 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22584 ix86_expand_trunc (rtx operand0, rtx operand1)
22586 /* C code for SSE variant we expand below.
22587 double xa = fabs (x), x2;
22588 if (!isless (xa, TWO52))
22590 x2 = (double)(long)x;
22591 if (HONOR_SIGNED_ZEROS (mode))
22592 return copysign (x2, x);
22595 enum machine_mode mode = GET_MODE (operand0);
22596 rtx xa, xi, TWO52, label, res, mask;
22598 TWO52 = ix86_gen_TWO52 (mode);
22600 /* Temporary for holding the result, initialized to the input
22601 operand to ease control flow. */
22602 res = gen_reg_rtx (mode);
22603 emit_move_insn (res, operand1);
22605 /* xa = abs (operand1) */
22606 xa = ix86_expand_sse_fabs (res, &mask);
22608 /* if (!isless (xa, TWO52)) goto label; */
22609 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22611 /* x = (double)(long)x */
22612 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22613 expand_fix (xi, res, 0);
22614 expand_float (res, xi, 0);
22616 if (HONOR_SIGNED_ZEROS (mode))
22617 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22619 emit_label (label);
22620 LABEL_NUSES (label) = 1;
22622 emit_move_insn (operand0, res);
22625 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22628 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22630 enum machine_mode mode = GET_MODE (operand0);
22631 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22633 /* C code for SSE variant we expand below.
22634 double xa = fabs (x), x2;
22635 if (!isless (xa, TWO52))
22637 xa2 = xa + TWO52 - TWO52;
22641 x2 = copysign (xa2, x);
22645 TWO52 = ix86_gen_TWO52 (mode);
22647 /* Temporary for holding the result, initialized to the input
22648 operand to ease control flow. */
22649 res = gen_reg_rtx (mode);
22650 emit_move_insn (res, operand1);
22652 /* xa = abs (operand1) */
22653 xa = ix86_expand_sse_fabs (res, &smask);
22655 /* if (!isless (xa, TWO52)) goto label; */
22656 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22658 /* res = xa + TWO52 - TWO52; */
22659 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22660 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
22661 emit_move_insn (res, tmp);
22664 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22666 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22667 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
22668 emit_insn (gen_rtx_SET (VOIDmode, mask,
22669 gen_rtx_AND (mode, mask, one)));
22670 tmp = expand_simple_binop (mode, MINUS,
22671 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
22672 emit_move_insn (res, tmp);
22674 /* res = copysign (res, operand1) */
22675 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
22677 emit_label (label);
22678 LABEL_NUSES (label) = 1;
22680 emit_move_insn (operand0, res);
22683 /* Expand SSE sequence for computing round from OPERAND1 storing
22686 ix86_expand_round (rtx operand0, rtx operand1)
22688 /* C code for the stuff we're doing below:
22689 double xa = fabs (x);
22690 if (!isless (xa, TWO52))
22692 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22693 return copysign (xa, x);
22695 enum machine_mode mode = GET_MODE (operand0);
22696 rtx res, TWO52, xa, label, xi, half, mask;
22697 const struct real_format *fmt;
22698 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22700 /* Temporary for holding the result, initialized to the input
22701 operand to ease control flow. */
22702 res = gen_reg_rtx (mode);
22703 emit_move_insn (res, operand1);
22705 TWO52 = ix86_gen_TWO52 (mode);
22706 xa = ix86_expand_sse_fabs (res, &mask);
22707 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22709 /* load nextafter (0.5, 0.0) */
22710 fmt = REAL_MODE_FORMAT (mode);
22711 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22712 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22714 /* xa = xa + 0.5 */
22715 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
22716 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
22718 /* xa = (double)(int64_t)xa */
22719 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22720 expand_fix (xi, xa, 0);
22721 expand_float (xa, xi, 0);
22723 /* res = copysign (xa, operand1) */
22724 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
22726 emit_label (label);
22727 LABEL_NUSES (label) = 1;
22729 emit_move_insn (operand0, res);
22733 /* Table of valid machine attributes. */
22734 static const struct attribute_spec ix86_attribute_table[] =
22736 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22737 /* Stdcall attribute says callee is responsible for popping arguments
22738 if they are not variable. */
22739 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22740 /* Fastcall attribute says callee is responsible for popping arguments
22741 if they are not variable. */
22742 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22743 /* Cdecl attribute says the callee is a normal C declaration */
22744 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22745 /* Regparm attribute specifies how many integer arguments are to be
22746 passed in registers. */
22747 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22748 /* Sseregparm attribute says we are using x86_64 calling conventions
22749 for FP arguments. */
22750 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22751 /* force_align_arg_pointer says this function realigns the stack at entry. */
22752 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22753 false, true, true, ix86_handle_cconv_attribute },
22754 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22755 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22756 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22757 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22759 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22760 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22761 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22762 SUBTARGET_ATTRIBUTE_TABLE,
22764 { NULL, 0, 0, false, false, false, NULL }
22767 /* Initialize the GCC target structure. */
22768 #undef TARGET_ATTRIBUTE_TABLE
22769 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22770 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22771 # undef TARGET_MERGE_DECL_ATTRIBUTES
22772 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22775 #undef TARGET_COMP_TYPE_ATTRIBUTES
22776 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22778 #undef TARGET_INIT_BUILTINS
22779 #define TARGET_INIT_BUILTINS ix86_init_builtins
22780 #undef TARGET_EXPAND_BUILTIN
22781 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22783 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22784 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22785 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22786 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22788 #undef TARGET_ASM_FUNCTION_EPILOGUE
22789 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22791 #undef TARGET_ENCODE_SECTION_INFO
22792 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22793 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22795 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22798 #undef TARGET_ASM_OPEN_PAREN
22799 #define TARGET_ASM_OPEN_PAREN ""
22800 #undef TARGET_ASM_CLOSE_PAREN
22801 #define TARGET_ASM_CLOSE_PAREN ""
22803 #undef TARGET_ASM_ALIGNED_HI_OP
22804 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22805 #undef TARGET_ASM_ALIGNED_SI_OP
22806 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22808 #undef TARGET_ASM_ALIGNED_DI_OP
22809 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22812 #undef TARGET_ASM_UNALIGNED_HI_OP
22813 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22814 #undef TARGET_ASM_UNALIGNED_SI_OP
22815 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22816 #undef TARGET_ASM_UNALIGNED_DI_OP
22817 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22819 #undef TARGET_SCHED_ADJUST_COST
22820 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22821 #undef TARGET_SCHED_ISSUE_RATE
22822 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22823 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22824 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22825 ia32_multipass_dfa_lookahead
22827 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22828 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22831 #undef TARGET_HAVE_TLS
22832 #define TARGET_HAVE_TLS true
22834 #undef TARGET_CANNOT_FORCE_CONST_MEM
22835 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22836 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22837 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22839 #undef TARGET_DELEGITIMIZE_ADDRESS
22840 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22842 #undef TARGET_MS_BITFIELD_LAYOUT_P
22843 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22846 #undef TARGET_BINDS_LOCAL_P
22847 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22849 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22850 #undef TARGET_BINDS_LOCAL_P
22851 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22854 #undef TARGET_ASM_OUTPUT_MI_THUNK
22855 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22856 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22857 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22859 #undef TARGET_ASM_FILE_START
22860 #define TARGET_ASM_FILE_START x86_file_start
22862 #undef TARGET_DEFAULT_TARGET_FLAGS
22863 #define TARGET_DEFAULT_TARGET_FLAGS \
22865 | TARGET_SUBTARGET_DEFAULT \
22866 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22868 #undef TARGET_HANDLE_OPTION
22869 #define TARGET_HANDLE_OPTION ix86_handle_option
22871 #undef TARGET_RTX_COSTS
22872 #define TARGET_RTX_COSTS ix86_rtx_costs
22873 #undef TARGET_ADDRESS_COST
22874 #define TARGET_ADDRESS_COST ix86_address_cost
22876 #undef TARGET_FIXED_CONDITION_CODE_REGS
22877 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22878 #undef TARGET_CC_MODES_COMPATIBLE
22879 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22881 #undef TARGET_MACHINE_DEPENDENT_REORG
22882 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22884 #undef TARGET_BUILD_BUILTIN_VA_LIST
22885 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22887 #undef TARGET_MD_ASM_CLOBBERS
22888 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22890 #undef TARGET_PROMOTE_PROTOTYPES
22891 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22892 #undef TARGET_STRUCT_VALUE_RTX
22893 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22894 #undef TARGET_SETUP_INCOMING_VARARGS
22895 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22896 #undef TARGET_MUST_PASS_IN_STACK
22897 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22898 #undef TARGET_PASS_BY_REFERENCE
22899 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22900 #undef TARGET_INTERNAL_ARG_POINTER
22901 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22902 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22903 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22904 #undef TARGET_STRICT_ARGUMENT_NAMING
22905 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22907 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22908 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22910 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22911 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22913 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22914 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22917 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22918 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22921 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22922 #undef TARGET_INSERT_ATTRIBUTES
22923 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22926 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22927 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22929 #undef TARGET_STACK_PROTECT_FAIL
22930 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22932 #undef TARGET_FUNCTION_VALUE
22933 #define TARGET_FUNCTION_VALUE ix86_function_value
22935 struct gcc_target targetm = TARGET_INITIALIZER;
22937 #include "gt-i386.h"