1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
72 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
75 struct processor_costs size_cost = { /* costs for tuning for size */
76 COSTS_N_BYTES (2), /* cost of an add instruction */
77 COSTS_N_BYTES (3), /* cost of a lea instruction */
78 COSTS_N_BYTES (2), /* variable shift costs */
79 COSTS_N_BYTES (3), /* constant shift costs */
80 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
81 COSTS_N_BYTES (3), /* HI */
82 COSTS_N_BYTES (3), /* SI */
83 COSTS_N_BYTES (3), /* DI */
84 COSTS_N_BYTES (5)}, /* other */
85 0, /* cost of multiply per each bit set */
86 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
87 COSTS_N_BYTES (3), /* HI */
88 COSTS_N_BYTES (3), /* SI */
89 COSTS_N_BYTES (3), /* DI */
90 COSTS_N_BYTES (5)}, /* other */
91 COSTS_N_BYTES (3), /* cost of movsx */
92 COSTS_N_BYTES (3), /* cost of movzx */
95 2, /* cost for loading QImode using movzbl */
96 {2, 2, 2}, /* cost of loading integer registers
97 in QImode, HImode and SImode.
98 Relative to reg-reg move (2). */
99 {2, 2, 2}, /* cost of storing integer registers */
100 2, /* cost of reg,reg fld/fst */
101 {2, 2, 2}, /* cost of loading fp registers
102 in SFmode, DFmode and XFmode */
103 {2, 2, 2}, /* cost of storing fp registers
104 in SFmode, DFmode and XFmode */
105 3, /* cost of moving MMX register */
106 {3, 3}, /* cost of loading MMX registers
107 in SImode and DImode */
108 {3, 3}, /* cost of storing MMX registers
109 in SImode and DImode */
110 3, /* cost of moving SSE register */
111 {3, 3, 3}, /* cost of loading SSE registers
112 in SImode, DImode and TImode */
113 {3, 3, 3}, /* cost of storing SSE registers
114 in SImode, DImode and TImode */
115 3, /* MMX or SSE register to integer */
116 0, /* size of prefetch block */
117 0, /* number of parallel prefetches */
119 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
120 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
121 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
122 COSTS_N_BYTES (2), /* cost of FABS instruction. */
123 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
124 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
127 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
131 /* Processor costs (relative to an add) */
133 struct processor_costs i386_cost = { /* 386 specific costs */
134 COSTS_N_INSNS (1), /* cost of an add instruction */
135 COSTS_N_INSNS (1), /* cost of a lea instruction */
136 COSTS_N_INSNS (3), /* variable shift costs */
137 COSTS_N_INSNS (2), /* constant shift costs */
138 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
139 COSTS_N_INSNS (6), /* HI */
140 COSTS_N_INSNS (6), /* SI */
141 COSTS_N_INSNS (6), /* DI */
142 COSTS_N_INSNS (6)}, /* other */
143 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
144 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
145 COSTS_N_INSNS (23), /* HI */
146 COSTS_N_INSNS (23), /* SI */
147 COSTS_N_INSNS (23), /* DI */
148 COSTS_N_INSNS (23)}, /* other */
149 COSTS_N_INSNS (3), /* cost of movsx */
150 COSTS_N_INSNS (2), /* cost of movzx */
151 15, /* "large" insn */
153 4, /* cost for loading QImode using movzbl */
154 {2, 4, 2}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 4, 2}, /* cost of storing integer registers */
158 2, /* cost of reg,reg fld/fst */
159 {8, 8, 8}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {8, 8, 8}, /* cost of storing fp registers
162 in SFmode, DFmode and XFmode */
163 2, /* cost of moving MMX register */
164 {4, 8}, /* cost of loading MMX registers
165 in SImode and DImode */
166 {4, 8}, /* cost of storing MMX registers
167 in SImode and DImode */
168 2, /* cost of moving SSE register */
169 {4, 8, 16}, /* cost of loading SSE registers
170 in SImode, DImode and TImode */
171 {4, 8, 16}, /* cost of storing SSE registers
172 in SImode, DImode and TImode */
173 3, /* MMX or SSE register to integer */
174 0, /* size of prefetch block */
175 0, /* number of parallel prefetches */
177 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
178 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
179 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
180 COSTS_N_INSNS (22), /* cost of FABS instruction. */
181 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
182 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
183 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
184 DUMMY_STRINGOP_ALGS},
185 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
186 DUMMY_STRINGOP_ALGS},
190 struct processor_costs i486_cost = { /* 486 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (12), /* HI */
197 COSTS_N_INSNS (12), /* SI */
198 COSTS_N_INSNS (12), /* DI */
199 COSTS_N_INSNS (12)}, /* other */
200 1, /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (40), /* HI */
203 COSTS_N_INSNS (40), /* SI */
204 COSTS_N_INSNS (40), /* DI */
205 COSTS_N_INSNS (40)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
234 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
235 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
236 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
237 COSTS_N_INSNS (3), /* cost of FABS instruction. */
238 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
239 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
240 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
241 DUMMY_STRINGOP_ALGS},
242 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
247 struct processor_costs pentium_cost = {
248 COSTS_N_INSNS (1), /* cost of an add instruction */
249 COSTS_N_INSNS (1), /* cost of a lea instruction */
250 COSTS_N_INSNS (4), /* variable shift costs */
251 COSTS_N_INSNS (1), /* constant shift costs */
252 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
253 COSTS_N_INSNS (11), /* HI */
254 COSTS_N_INSNS (11), /* SI */
255 COSTS_N_INSNS (11), /* DI */
256 COSTS_N_INSNS (11)}, /* other */
257 0, /* cost of multiply per each bit set */
258 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
259 COSTS_N_INSNS (25), /* HI */
260 COSTS_N_INSNS (25), /* SI */
261 COSTS_N_INSNS (25), /* DI */
262 COSTS_N_INSNS (25)}, /* other */
263 COSTS_N_INSNS (3), /* cost of movsx */
264 COSTS_N_INSNS (2), /* cost of movzx */
265 8, /* "large" insn */
267 6, /* cost for loading QImode using movzbl */
268 {2, 4, 2}, /* cost of loading integer registers
269 in QImode, HImode and SImode.
270 Relative to reg-reg move (2). */
271 {2, 4, 2}, /* cost of storing integer registers */
272 2, /* cost of reg,reg fld/fst */
273 {2, 2, 6}, /* cost of loading fp registers
274 in SFmode, DFmode and XFmode */
275 {4, 4, 6}, /* cost of storing fp registers
276 in SFmode, DFmode and XFmode */
277 8, /* cost of moving MMX register */
278 {8, 8}, /* cost of loading MMX registers
279 in SImode and DImode */
280 {8, 8}, /* cost of storing MMX registers
281 in SImode and DImode */
282 2, /* cost of moving SSE register */
283 {4, 8, 16}, /* cost of loading SSE registers
284 in SImode, DImode and TImode */
285 {4, 8, 16}, /* cost of storing SSE registers
286 in SImode, DImode and TImode */
287 3, /* MMX or SSE register to integer */
288 0, /* size of prefetch block */
289 0, /* number of parallel prefetches */
291 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
292 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
293 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
294 COSTS_N_INSNS (1), /* cost of FABS instruction. */
295 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
296 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
297 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
298 DUMMY_STRINGOP_ALGS},
299 {{libcall, {{-1, rep_prefix_4_byte}}},
304 struct processor_costs pentiumpro_cost = {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (1), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (4), /* HI */
311 COSTS_N_INSNS (4), /* SI */
312 COSTS_N_INSNS (4), /* DI */
313 COSTS_N_INSNS (4)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (17), /* HI */
317 COSTS_N_INSNS (17), /* SI */
318 COSTS_N_INSNS (17), /* DI */
319 COSTS_N_INSNS (17)}, /* other */
320 COSTS_N_INSNS (1), /* cost of movsx */
321 COSTS_N_INSNS (1), /* cost of movzx */
322 8, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 4, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 2, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 2, /* cost of moving MMX register */
335 {2, 2}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {2, 2}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {2, 2, 8}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {2, 2, 8}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 32, /* size of prefetch block */
346 6, /* number of parallel prefetches */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
354 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
355 the alignment). For small blocks inline loop is still a noticeable win, for bigger
356 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
357 more expensive startup time in CPU, but after 4K the difference is down in the noise.
359 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
360 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
361 DUMMY_STRINGOP_ALGS},
362 {{rep_prefix_4_byte, {{1024, unrolled_loop},
363 {8192, rep_prefix_4_byte}, {-1, libcall}}},
368 struct processor_costs geode_cost = {
369 COSTS_N_INSNS (1), /* cost of an add instruction */
370 COSTS_N_INSNS (1), /* cost of a lea instruction */
371 COSTS_N_INSNS (2), /* variable shift costs */
372 COSTS_N_INSNS (1), /* constant shift costs */
373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
374 COSTS_N_INSNS (4), /* HI */
375 COSTS_N_INSNS (7), /* SI */
376 COSTS_N_INSNS (7), /* DI */
377 COSTS_N_INSNS (7)}, /* other */
378 0, /* cost of multiply per each bit set */
379 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
380 COSTS_N_INSNS (23), /* HI */
381 COSTS_N_INSNS (39), /* SI */
382 COSTS_N_INSNS (39), /* DI */
383 COSTS_N_INSNS (39)}, /* other */
384 COSTS_N_INSNS (1), /* cost of movsx */
385 COSTS_N_INSNS (1), /* cost of movzx */
386 8, /* "large" insn */
388 1, /* cost for loading QImode using movzbl */
389 {1, 1, 1}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {1, 1, 1}, /* cost of storing integer registers */
393 1, /* cost of reg,reg fld/fst */
394 {1, 1, 1}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {4, 6, 6}, /* cost of storing fp registers
397 in SFmode, DFmode and XFmode */
399 1, /* cost of moving MMX register */
400 {1, 1}, /* cost of loading MMX registers
401 in SImode and DImode */
402 {1, 1}, /* cost of storing MMX registers
403 in SImode and DImode */
404 1, /* cost of moving SSE register */
405 {1, 1, 1}, /* cost of loading SSE registers
406 in SImode, DImode and TImode */
407 {1, 1, 1}, /* cost of storing SSE registers
408 in SImode, DImode and TImode */
409 1, /* MMX or SSE register to integer */
410 32, /* size of prefetch block */
411 1, /* number of parallel prefetches */
413 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
414 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
415 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
416 COSTS_N_INSNS (1), /* cost of FABS instruction. */
417 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
418 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
419 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
420 DUMMY_STRINGOP_ALGS},
421 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
426 struct processor_costs k6_cost = {
427 COSTS_N_INSNS (1), /* cost of an add instruction */
428 COSTS_N_INSNS (2), /* cost of a lea instruction */
429 COSTS_N_INSNS (1), /* variable shift costs */
430 COSTS_N_INSNS (1), /* constant shift costs */
431 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
432 COSTS_N_INSNS (3), /* HI */
433 COSTS_N_INSNS (3), /* SI */
434 COSTS_N_INSNS (3), /* DI */
435 COSTS_N_INSNS (3)}, /* other */
436 0, /* cost of multiply per each bit set */
437 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
438 COSTS_N_INSNS (18), /* HI */
439 COSTS_N_INSNS (18), /* SI */
440 COSTS_N_INSNS (18), /* DI */
441 COSTS_N_INSNS (18)}, /* other */
442 COSTS_N_INSNS (2), /* cost of movsx */
443 COSTS_N_INSNS (2), /* cost of movzx */
444 8, /* "large" insn */
446 3, /* cost for loading QImode using movzbl */
447 {4, 5, 4}, /* cost of loading integer registers
448 in QImode, HImode and SImode.
449 Relative to reg-reg move (2). */
450 {2, 3, 2}, /* cost of storing integer registers */
451 4, /* cost of reg,reg fld/fst */
452 {6, 6, 6}, /* cost of loading fp registers
453 in SFmode, DFmode and XFmode */
454 {4, 4, 4}, /* cost of storing fp registers
455 in SFmode, DFmode and XFmode */
456 2, /* cost of moving MMX register */
457 {2, 2}, /* cost of loading MMX registers
458 in SImode and DImode */
459 {2, 2}, /* cost of storing MMX registers
460 in SImode and DImode */
461 2, /* cost of moving SSE register */
462 {2, 2, 8}, /* cost of loading SSE registers
463 in SImode, DImode and TImode */
464 {2, 2, 8}, /* cost of storing SSE registers
465 in SImode, DImode and TImode */
466 6, /* MMX or SSE register to integer */
467 32, /* size of prefetch block */
468 1, /* number of parallel prefetches */
470 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
476 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
477 DUMMY_STRINGOP_ALGS},
478 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
483 struct processor_costs athlon_cost = {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (2), /* cost of a lea instruction */
486 COSTS_N_INSNS (1), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (5), /* HI */
490 COSTS_N_INSNS (5), /* SI */
491 COSTS_N_INSNS (5), /* DI */
492 COSTS_N_INSNS (5)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (26), /* HI */
496 COSTS_N_INSNS (42), /* SI */
497 COSTS_N_INSNS (74), /* DI */
498 COSTS_N_INSNS (74)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 4, /* cost for loading QImode using movzbl */
504 {3, 4, 3}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {3, 4, 3}, /* cost of storing integer registers */
508 4, /* cost of reg,reg fld/fst */
509 {4, 4, 12}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {6, 6, 8}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
513 2, /* cost of moving MMX register */
514 {4, 4}, /* cost of loading MMX registers
515 in SImode and DImode */
516 {4, 4}, /* cost of storing MMX registers
517 in SImode and DImode */
518 2, /* cost of moving SSE register */
519 {4, 4, 6}, /* cost of loading SSE registers
520 in SImode, DImode and TImode */
521 {4, 4, 5}, /* cost of storing SSE registers
522 in SImode, DImode and TImode */
523 5, /* MMX or SSE register to integer */
524 64, /* size of prefetch block */
525 6, /* number of parallel prefetches */
527 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
528 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
529 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
530 COSTS_N_INSNS (2), /* cost of FABS instruction. */
531 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
532 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
533 /* For some reason, Athlon deals better with REP prefix (relative to loops)
534 compared to K8. Alignment becomes important after 8 bytes for memcpy and
535 128 bytes for memset. */
536 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
537 DUMMY_STRINGOP_ALGS},
538 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
543 struct processor_costs k8_cost = {
544 COSTS_N_INSNS (1), /* cost of an add instruction */
545 COSTS_N_INSNS (2), /* cost of a lea instruction */
546 COSTS_N_INSNS (1), /* variable shift costs */
547 COSTS_N_INSNS (1), /* constant shift costs */
548 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
549 COSTS_N_INSNS (4), /* HI */
550 COSTS_N_INSNS (3), /* SI */
551 COSTS_N_INSNS (4), /* DI */
552 COSTS_N_INSNS (5)}, /* other */
553 0, /* cost of multiply per each bit set */
554 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
555 COSTS_N_INSNS (26), /* HI */
556 COSTS_N_INSNS (42), /* SI */
557 COSTS_N_INSNS (74), /* DI */
558 COSTS_N_INSNS (74)}, /* other */
559 COSTS_N_INSNS (1), /* cost of movsx */
560 COSTS_N_INSNS (1), /* cost of movzx */
561 8, /* "large" insn */
563 4, /* cost for loading QImode using movzbl */
564 {3, 4, 3}, /* cost of loading integer registers
565 in QImode, HImode and SImode.
566 Relative to reg-reg move (2). */
567 {3, 4, 3}, /* cost of storing integer registers */
568 4, /* cost of reg,reg fld/fst */
569 {4, 4, 12}, /* cost of loading fp registers
570 in SFmode, DFmode and XFmode */
571 {6, 6, 8}, /* cost of storing fp registers
572 in SFmode, DFmode and XFmode */
573 2, /* cost of moving MMX register */
574 {3, 3}, /* cost of loading MMX registers
575 in SImode and DImode */
576 {4, 4}, /* cost of storing MMX registers
577 in SImode and DImode */
578 2, /* cost of moving SSE register */
579 {4, 3, 6}, /* cost of loading SSE registers
580 in SImode, DImode and TImode */
581 {4, 4, 5}, /* cost of storing SSE registers
582 in SImode, DImode and TImode */
583 5, /* MMX or SSE register to integer */
584 64, /* size of prefetch block */
585 /* New AMD processors never drop prefetches; if they cannot be performed
586 immediately, they are queued. We set number of simultaneous prefetches
587 to a large constant to reflect this (it probably is not a good idea not
588 to limit number of prefetches at all, as their execution also takes some
590 100, /* number of parallel prefetches */
592 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
593 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
594 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
595 COSTS_N_INSNS (2), /* cost of FABS instruction. */
596 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
597 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
598 /* K8 has optimized REP instruction for medium sized blocks, but for very small
599 blocks it is better to use loop. For large blocks, libcall can do
600 nontemporary accesses and beat inline considerably. */
601 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
602 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
603 {{libcall, {{8, loop}, {24, unrolled_loop},
604 {2048, rep_prefix_4_byte}, {-1, libcall}}},
605 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
608 struct processor_costs amdfam10_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (2), /* cost of a lea instruction */
611 COSTS_N_INSNS (1), /* variable shift costs */
612 COSTS_N_INSNS (1), /* constant shift costs */
613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (4), /* HI */
615 COSTS_N_INSNS (3), /* SI */
616 COSTS_N_INSNS (4), /* DI */
617 COSTS_N_INSNS (5)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (35), /* HI */
621 COSTS_N_INSNS (51), /* SI */
622 COSTS_N_INSNS (83), /* DI */
623 COSTS_N_INSNS (83)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 8, /* "large" insn */
628 4, /* cost for loading QImode using movzbl */
629 {3, 4, 3}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {3, 4, 3}, /* cost of storing integer registers */
633 4, /* cost of reg,reg fld/fst */
634 {4, 4, 12}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {6, 6, 8}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {3, 3}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {4, 4}, /* cost of storing MMX registers
642 in SImode and DImode */
643 2, /* cost of moving SSE register */
644 {4, 4, 3}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {4, 4, 5}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 3, /* MMX or SSE register to integer */
650 MOVD reg64, xmmreg Double FSTORE 4
651 MOVD reg32, xmmreg Double FSTORE 4
653 MOVD reg64, xmmreg Double FADD 3
655 MOVD reg32, xmmreg Double FADD 3
657 64, /* size of prefetch block */
658 /* New AMD processors never drop prefetches; if they cannot be performed
659 immediately, they are queued. We set number of simultaneous prefetches
660 to a large constant to reflect this (it probably is not a good idea not
661 to limit number of prefetches at all, as their execution also takes some
663 100, /* number of parallel prefetches */
665 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
666 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
667 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
668 COSTS_N_INSNS (2), /* cost of FABS instruction. */
669 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
670 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
672 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
673 very small blocks it is better to use loop. For large blocks, libcall can
674 do nontemporary accesses and beat inline considerably. */
675 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
676 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
677 {{libcall, {{8, loop}, {24, unrolled_loop},
678 {2048, rep_prefix_4_byte}, {-1, libcall}}},
679 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
683 struct processor_costs pentium4_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (3), /* cost of a lea instruction */
686 COSTS_N_INSNS (4), /* variable shift costs */
687 COSTS_N_INSNS (4), /* constant shift costs */
688 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (15), /* HI */
690 COSTS_N_INSNS (15), /* SI */
691 COSTS_N_INSNS (15), /* DI */
692 COSTS_N_INSNS (15)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (56), /* HI */
696 COSTS_N_INSNS (56), /* SI */
697 COSTS_N_INSNS (56), /* DI */
698 COSTS_N_INSNS (56)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 16, /* "large" insn */
703 2, /* cost for loading QImode using movzbl */
704 {4, 5, 4}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {2, 3, 2}, /* cost of storing integer registers */
708 2, /* cost of reg,reg fld/fst */
709 {2, 2, 6}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 4, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
713 2, /* cost of moving MMX register */
714 {2, 2}, /* cost of loading MMX registers
715 in SImode and DImode */
716 {2, 2}, /* cost of storing MMX registers
717 in SImode and DImode */
718 12, /* cost of moving SSE register */
719 {12, 12, 12}, /* cost of loading SSE registers
720 in SImode, DImode and TImode */
721 {2, 2, 8}, /* cost of storing SSE registers
722 in SImode, DImode and TImode */
723 10, /* MMX or SSE register to integer */
724 64, /* size of prefetch block */
725 6, /* number of parallel prefetches */
727 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
728 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
729 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
730 COSTS_N_INSNS (2), /* cost of FABS instruction. */
731 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
732 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
733 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
734 DUMMY_STRINGOP_ALGS},
735 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
737 DUMMY_STRINGOP_ALGS},
741 struct processor_costs nocona_cost = {
742 COSTS_N_INSNS (1), /* cost of an add instruction */
743 COSTS_N_INSNS (1), /* cost of a lea instruction */
744 COSTS_N_INSNS (1), /* variable shift costs */
745 COSTS_N_INSNS (1), /* constant shift costs */
746 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
747 COSTS_N_INSNS (10), /* HI */
748 COSTS_N_INSNS (10), /* SI */
749 COSTS_N_INSNS (10), /* DI */
750 COSTS_N_INSNS (10)}, /* other */
751 0, /* cost of multiply per each bit set */
752 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
753 COSTS_N_INSNS (66), /* HI */
754 COSTS_N_INSNS (66), /* SI */
755 COSTS_N_INSNS (66), /* DI */
756 COSTS_N_INSNS (66)}, /* other */
757 COSTS_N_INSNS (1), /* cost of movsx */
758 COSTS_N_INSNS (1), /* cost of movzx */
759 16, /* "large" insn */
761 4, /* cost for loading QImode using movzbl */
762 {4, 4, 4}, /* cost of loading integer registers
763 in QImode, HImode and SImode.
764 Relative to reg-reg move (2). */
765 {4, 4, 4}, /* cost of storing integer registers */
766 3, /* cost of reg,reg fld/fst */
767 {12, 12, 12}, /* cost of loading fp registers
768 in SFmode, DFmode and XFmode */
769 {4, 4, 4}, /* cost of storing fp registers
770 in SFmode, DFmode and XFmode */
771 6, /* cost of moving MMX register */
772 {12, 12}, /* cost of loading MMX registers
773 in SImode and DImode */
774 {12, 12}, /* cost of storing MMX registers
775 in SImode and DImode */
776 6, /* cost of moving SSE register */
777 {12, 12, 12}, /* cost of loading SSE registers
778 in SImode, DImode and TImode */
779 {12, 12, 12}, /* cost of storing SSE registers
780 in SImode, DImode and TImode */
781 8, /* MMX or SSE register to integer */
782 128, /* size of prefetch block */
783 8, /* number of parallel prefetches */
785 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (3), /* cost of FABS instruction. */
789 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
791 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
792 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
793 {100000, unrolled_loop}, {-1, libcall}}}},
794 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
796 {libcall, {{24, loop}, {64, unrolled_loop},
797 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
801 struct processor_costs core2_cost = {
802 COSTS_N_INSNS (1), /* cost of an add instruction */
803 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
804 COSTS_N_INSNS (1), /* variable shift costs */
805 COSTS_N_INSNS (1), /* constant shift costs */
806 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
807 COSTS_N_INSNS (3), /* HI */
808 COSTS_N_INSNS (3), /* SI */
809 COSTS_N_INSNS (3), /* DI */
810 COSTS_N_INSNS (3)}, /* other */
811 0, /* cost of multiply per each bit set */
812 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
813 COSTS_N_INSNS (22), /* HI */
814 COSTS_N_INSNS (22), /* SI */
815 COSTS_N_INSNS (22), /* DI */
816 COSTS_N_INSNS (22)}, /* other */
817 COSTS_N_INSNS (1), /* cost of movsx */
818 COSTS_N_INSNS (1), /* cost of movzx */
819 8, /* "large" insn */
821 2, /* cost for loading QImode using movzbl */
822 {6, 6, 6}, /* cost of loading integer registers
823 in QImode, HImode and SImode.
824 Relative to reg-reg move (2). */
825 {4, 4, 4}, /* cost of storing integer registers */
826 2, /* cost of reg,reg fld/fst */
827 {6, 6, 6}, /* cost of loading fp registers
828 in SFmode, DFmode and XFmode */
829 {4, 4, 4}, /* cost of loading integer registers */
830 2, /* cost of moving MMX register */
831 {6, 6}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {4, 4}, /* cost of storing MMX registers
834 in SImode and DImode */
835 2, /* cost of moving SSE register */
836 {6, 6, 6}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {4, 4, 4}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 2, /* MMX or SSE register to integer */
841 128, /* size of prefetch block */
842 8, /* number of parallel prefetches */
844 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
845 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
846 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
847 COSTS_N_INSNS (1), /* cost of FABS instruction. */
848 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
849 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
850 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
851 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
852 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
853 {{libcall, {{8, loop}, {15, unrolled_loop},
854 {2048, rep_prefix_4_byte}, {-1, libcall}}},
855 {libcall, {{24, loop}, {32, unrolled_loop},
856 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
859 /* Generic64 should produce code tuned for Nocona and K8. */
861 struct processor_costs generic64_cost = {
862 COSTS_N_INSNS (1), /* cost of an add instruction */
863 /* On all chips taken into consideration lea is 2 cycles and more. With
864 this cost however our current implementation of synth_mult results in
865 use of unnecessary temporary registers causing regression on several
866 SPECfp benchmarks. */
867 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
868 COSTS_N_INSNS (1), /* variable shift costs */
869 COSTS_N_INSNS (1), /* constant shift costs */
870 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
871 COSTS_N_INSNS (4), /* HI */
872 COSTS_N_INSNS (3), /* SI */
873 COSTS_N_INSNS (4), /* DI */
874 COSTS_N_INSNS (2)}, /* other */
875 0, /* cost of multiply per each bit set */
876 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
877 COSTS_N_INSNS (26), /* HI */
878 COSTS_N_INSNS (42), /* SI */
879 COSTS_N_INSNS (74), /* DI */
880 COSTS_N_INSNS (74)}, /* other */
881 COSTS_N_INSNS (1), /* cost of movsx */
882 COSTS_N_INSNS (1), /* cost of movzx */
883 8, /* "large" insn */
885 4, /* cost for loading QImode using movzbl */
886 {4, 4, 4}, /* cost of loading integer registers
887 in QImode, HImode and SImode.
888 Relative to reg-reg move (2). */
889 {4, 4, 4}, /* cost of storing integer registers */
890 4, /* cost of reg,reg fld/fst */
891 {12, 12, 12}, /* cost of loading fp registers
892 in SFmode, DFmode and XFmode */
893 {6, 6, 8}, /* cost of storing fp registers
894 in SFmode, DFmode and XFmode */
895 2, /* cost of moving MMX register */
896 {8, 8}, /* cost of loading MMX registers
897 in SImode and DImode */
898 {8, 8}, /* cost of storing MMX registers
899 in SImode and DImode */
900 2, /* cost of moving SSE register */
901 {8, 8, 8}, /* cost of loading SSE registers
902 in SImode, DImode and TImode */
903 {8, 8, 8}, /* cost of storing SSE registers
904 in SImode, DImode and TImode */
905 5, /* MMX or SSE register to integer */
906 64, /* size of prefetch block */
907 6, /* number of parallel prefetches */
908 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
909 is increased to perhaps more appropriate value of 5. */
911 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
912 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
913 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
914 COSTS_N_INSNS (8), /* cost of FABS instruction. */
915 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
916 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
917 {DUMMY_STRINGOP_ALGS,
918 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
919 {DUMMY_STRINGOP_ALGS,
920 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
923 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
925 struct processor_costs generic32_cost = {
926 COSTS_N_INSNS (1), /* cost of an add instruction */
927 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
928 COSTS_N_INSNS (1), /* variable shift costs */
929 COSTS_N_INSNS (1), /* constant shift costs */
930 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
931 COSTS_N_INSNS (4), /* HI */
932 COSTS_N_INSNS (3), /* SI */
933 COSTS_N_INSNS (4), /* DI */
934 COSTS_N_INSNS (2)}, /* other */
935 0, /* cost of multiply per each bit set */
936 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
937 COSTS_N_INSNS (26), /* HI */
938 COSTS_N_INSNS (42), /* SI */
939 COSTS_N_INSNS (74), /* DI */
940 COSTS_N_INSNS (74)}, /* other */
941 COSTS_N_INSNS (1), /* cost of movsx */
942 COSTS_N_INSNS (1), /* cost of movzx */
943 8, /* "large" insn */
945 4, /* cost for loading QImode using movzbl */
946 {4, 4, 4}, /* cost of loading integer registers
947 in QImode, HImode and SImode.
948 Relative to reg-reg move (2). */
949 {4, 4, 4}, /* cost of storing integer registers */
950 4, /* cost of reg,reg fld/fst */
951 {12, 12, 12}, /* cost of loading fp registers
952 in SFmode, DFmode and XFmode */
953 {6, 6, 8}, /* cost of storing fp registers
954 in SFmode, DFmode and XFmode */
955 2, /* cost of moving MMX register */
956 {8, 8}, /* cost of loading MMX registers
957 in SImode and DImode */
958 {8, 8}, /* cost of storing MMX registers
959 in SImode and DImode */
960 2, /* cost of moving SSE register */
961 {8, 8, 8}, /* cost of loading SSE registers
962 in SImode, DImode and TImode */
963 {8, 8, 8}, /* cost of storing SSE registers
964 in SImode, DImode and TImode */
965 5, /* MMX or SSE register to integer */
966 64, /* size of prefetch block */
967 6, /* number of parallel prefetches */
969 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
970 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
971 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
972 COSTS_N_INSNS (8), /* cost of FABS instruction. */
973 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
974 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
975 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
976 DUMMY_STRINGOP_ALGS},
977 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
978 DUMMY_STRINGOP_ALGS},
981 const struct processor_costs *ix86_cost = &pentium_cost;
983 /* Processor feature/optimization bitmasks. */
984 #define m_386 (1<<PROCESSOR_I386)
985 #define m_486 (1<<PROCESSOR_I486)
986 #define m_PENT (1<<PROCESSOR_PENTIUM)
987 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
988 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
989 #define m_NOCONA (1<<PROCESSOR_NOCONA)
990 #define m_CORE2 (1<<PROCESSOR_CORE2)
992 #define m_GEODE (1<<PROCESSOR_GEODE)
993 #define m_K6 (1<<PROCESSOR_K6)
994 #define m_K6_GEODE (m_K6 | m_GEODE)
995 #define m_K8 (1<<PROCESSOR_K8)
996 #define m_ATHLON (1<<PROCESSOR_ATHLON)
997 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
998 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
999 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1002 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1004 /* Generic instruction choice should be common subset of supported CPUs
1005 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1006 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1008 /* Feature tests against the various tunings. */
1009 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1010 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1011 negatively, so enabling for Generic64 seems like good code size
1012 tradeoff. We can't enable it for 32bit generic because it does not
1013 work well with PPro base chips. */
1014 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1016 /* X86_TUNE_PUSH_MEMORY */
1017 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1018 | m_NOCONA | m_CORE2 | m_GENERIC,
1020 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1023 /* X86_TUNE_USE_BIT_TEST */
1026 /* X86_TUNE_UNROLL_STRLEN */
1027 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1029 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1030 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1232 ~(m_386 | m_486 | m_PENT | m_K6),
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1254 static enum stringop_alg stringop_alg = no_stringop;
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1283 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1286 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1289 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1290 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1291 /* SSE REX registers */
1292 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1296 /* The "default" register map used in 32bit mode. */
1298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1309 static int const x86_64_int_parameter_registers[6] =
1311 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1312 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1315 static int const x86_64_ms_abi_int_parameter_registers[4] =
1317 2 /*RCX*/, 1 /*RDX*/,
1318 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1321 static int const x86_64_int_return_registers[4] =
1323 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1326 /* The "default" register map used in 64bit mode. */
1327 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1329 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1330 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1331 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1332 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1333 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1334 8,9,10,11,12,13,14,15, /* extended integer registers */
1335 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1338 /* Define the register numbers to be used in Dwarf debugging information.
1339 The SVR4 reference port C compiler uses the following register numbers
1340 in its Dwarf output code:
1341 0 for %eax (gcc regno = 0)
1342 1 for %ecx (gcc regno = 2)
1343 2 for %edx (gcc regno = 1)
1344 3 for %ebx (gcc regno = 3)
1345 4 for %esp (gcc regno = 7)
1346 5 for %ebp (gcc regno = 6)
1347 6 for %esi (gcc regno = 4)
1348 7 for %edi (gcc regno = 5)
1349 The following three DWARF register numbers are never generated by
1350 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1351 believes these numbers have these meanings.
1352 8 for %eip (no gcc equivalent)
1353 9 for %eflags (gcc regno = 17)
1354 10 for %trapno (no gcc equivalent)
1355 It is not at all clear how we should number the FP stack registers
1356 for the x86 architecture. If the version of SDB on x86/svr4 were
1357 a bit less brain dead with respect to floating-point then we would
1358 have a precedent to follow with respect to DWARF register numbers
1359 for x86 FP registers, but the SDB on x86/svr4 is so completely
1360 broken with respect to FP registers that it is hardly worth thinking
1361 of it as something to strive for compatibility with.
1362 The version of x86/svr4 SDB I have at the moment does (partially)
1363 seem to believe that DWARF register number 11 is associated with
1364 the x86 register %st(0), but that's about all. Higher DWARF
1365 register numbers don't seem to be associated with anything in
1366 particular, and even for DWARF regno 11, SDB only seems to under-
1367 stand that it should say that a variable lives in %st(0) (when
1368 asked via an `=' command) if we said it was in DWARF regno 11,
1369 but SDB still prints garbage when asked for the value of the
1370 variable in question (via a `/' command).
1371 (Also note that the labels SDB prints for various FP stack regs
1372 when doing an `x' command are all wrong.)
1373 Note that these problems generally don't affect the native SVR4
1374 C compiler because it doesn't allow the use of -O with -g and
1375 because when it is *not* optimizing, it allocates a memory
1376 location for each floating-point variable, and the memory
1377 location is what gets described in the DWARF AT_location
1378 attribute for the variable in question.
1379 Regardless of the severe mental illness of the x86/svr4 SDB, we
1380 do something sensible here and we use the following DWARF
1381 register numbers. Note that these are all stack-top-relative
1383 11 for %st(0) (gcc regno = 8)
1384 12 for %st(1) (gcc regno = 9)
1385 13 for %st(2) (gcc regno = 10)
1386 14 for %st(3) (gcc regno = 11)
1387 15 for %st(4) (gcc regno = 12)
1388 16 for %st(5) (gcc regno = 13)
1389 17 for %st(6) (gcc regno = 14)
1390 18 for %st(7) (gcc regno = 15)
1392 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1394 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1395 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1396 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1397 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1398 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1399 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1400 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1403 /* Test and compare insns in i386.md store the information needed to
1404 generate branch and scc insns here. */
1406 rtx ix86_compare_op0 = NULL_RTX;
1407 rtx ix86_compare_op1 = NULL_RTX;
1408 rtx ix86_compare_emitted = NULL_RTX;
1410 /* Size of the register save area. */
1411 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1413 /* Define the structure for the machine field in struct function. */
1415 struct stack_local_entry GTY(())
1417 unsigned short mode;
1420 struct stack_local_entry *next;
1423 /* Structure describing stack frame layout.
1424 Stack grows downward:
1430 saved frame pointer if frame_pointer_needed
1431 <- HARD_FRAME_POINTER
1436 [va_arg registers] (
1437 > to_allocate <- FRAME_POINTER
1447 HOST_WIDE_INT frame;
1449 int outgoing_arguments_size;
1452 HOST_WIDE_INT to_allocate;
1453 /* The offsets relative to ARG_POINTER. */
1454 HOST_WIDE_INT frame_pointer_offset;
1455 HOST_WIDE_INT hard_frame_pointer_offset;
1456 HOST_WIDE_INT stack_pointer_offset;
1458 /* When save_regs_using_mov is set, emit prologue using
1459 move instead of push instructions. */
1460 bool save_regs_using_mov;
1463 /* Code model option. */
1464 enum cmodel ix86_cmodel;
1466 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1468 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1470 /* Which unit we are generating floating point math for. */
1471 enum fpmath_unit ix86_fpmath;
1473 /* Which cpu are we scheduling for. */
1474 enum processor_type ix86_tune;
1476 /* Which instruction set architecture to use. */
1477 enum processor_type ix86_arch;
1479 /* true if sse prefetch instruction is not NOOP. */
1480 int x86_prefetch_sse;
1482 /* ix86_regparm_string as a number */
1483 static int ix86_regparm;
1485 /* -mstackrealign option */
1486 extern int ix86_force_align_arg_pointer;
1487 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1489 /* Preferred alignment for stack boundary in bits. */
1490 unsigned int ix86_preferred_stack_boundary;
1492 /* Values 1-5: see jump.c */
1493 int ix86_branch_cost;
1495 /* Variables which are this size or smaller are put in the data/bss
1496 or ldata/lbss sections. */
1498 int ix86_section_threshold = 65536;
1500 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1501 char internal_label_prefix[16];
1502 int internal_label_prefix_len;
1504 /* Fence to use after loop using movnt. */
1507 /* Register class used for passing given 64bit part of the argument.
1508 These represent classes as documented by the PS ABI, with the exception
1509 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1510 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1512 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1513 whenever possible (upper half does contain padding). */
1514 enum x86_64_reg_class
1517 X86_64_INTEGER_CLASS,
1518 X86_64_INTEGERSI_CLASS,
1525 X86_64_COMPLEX_X87_CLASS,
1528 static const char * const x86_64_reg_class_name[] =
1530 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1531 "sseup", "x87", "x87up", "cplx87", "no"
1534 #define MAX_CLASSES 4
1536 /* Table of constants used by fldpi, fldln2, etc.... */
1537 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1538 static bool ext_80387_constants_init = 0;
1541 static struct machine_function * ix86_init_machine_status (void);
1542 static rtx ix86_function_value (tree, tree, bool);
1543 static int ix86_function_regparm (tree, tree);
1544 static void ix86_compute_frame_layout (struct ix86_frame *);
1545 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1549 /* The svr4 ABI for the i386 says that records and unions are returned
1551 #ifndef DEFAULT_PCC_STRUCT_RETURN
1552 #define DEFAULT_PCC_STRUCT_RETURN 1
1555 /* Bit flags that specify the ISA we are compiling for. */
1556 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1558 /* A mask of ix86_isa_flags that includes bit X if X
1559 was set or cleared on the command line. */
1560 static int ix86_isa_flags_explicit;
1562 /* Define a set of ISAs which aren't available for a given ISA. MMX
1563 and SSE ISAs are handled separately. */
1565 #define OPTION_MASK_ISA_MMX_UNSET \
1566 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1567 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1569 #define OPTION_MASK_ISA_SSE_UNSET \
1570 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1571 #define OPTION_MASK_ISA_SSE2_UNSET \
1572 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1573 #define OPTION_MASK_ISA_SSE3_UNSET \
1574 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1575 #define OPTION_MASK_ISA_SSSE3_UNSET \
1576 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1577 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1578 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1579 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1581 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1582 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1583 #define OPTION_MASK_ISA_SSE4 \
1584 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1585 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1587 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1589 /* Implement TARGET_HANDLE_OPTION. */
1592 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1600 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1601 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1606 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1609 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1621 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1622 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1627 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1630 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1639 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1640 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1645 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1657 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1658 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1663 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1666 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1685 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1686 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1695 /* Sometimes certain combinations of command options do not make
1696 sense on a particular target machine. You can define a macro
1697 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1698 defined, is executed once just after all the command options have
1701 Don't use this macro to turn on various extra optimizations for
1702 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1705 override_options (void)
1708 int ix86_tune_defaulted = 0;
1709 int ix86_arch_specified = 0;
1710 unsigned int ix86_arch_mask, ix86_tune_mask;
1712 /* Comes from final.c -- no real reason to change it. */
1713 #define MAX_CODE_ALIGN 16
1717 const struct processor_costs *cost; /* Processor costs */
1718 const int align_loop; /* Default alignments. */
1719 const int align_loop_max_skip;
1720 const int align_jump;
1721 const int align_jump_max_skip;
1722 const int align_func;
1724 const processor_target_table[PROCESSOR_max] =
1726 {&i386_cost, 4, 3, 4, 3, 4},
1727 {&i486_cost, 16, 15, 16, 15, 16},
1728 {&pentium_cost, 16, 7, 16, 7, 16},
1729 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1730 {&geode_cost, 0, 0, 0, 0, 0},
1731 {&k6_cost, 32, 7, 32, 7, 32},
1732 {&athlon_cost, 16, 7, 16, 7, 16},
1733 {&pentium4_cost, 0, 0, 0, 0, 0},
1734 {&k8_cost, 16, 7, 16, 7, 16},
1735 {&nocona_cost, 0, 0, 0, 0, 0},
1736 {&core2_cost, 16, 10, 16, 10, 16},
1737 {&generic32_cost, 16, 7, 16, 7, 16},
1738 {&generic64_cost, 16, 10, 16, 10, 16},
1739 {&amdfam10_cost, 32, 24, 32, 7, 32}
1742 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1749 PTA_PREFETCH_SSE = 1 << 4,
1751 PTA_3DNOW_A = 1 << 6,
1755 PTA_POPCNT = 1 << 10,
1757 PTA_SSE4A = 1 << 12,
1758 PTA_NO_SAHF = 1 << 13,
1759 PTA_SSE4_1 = 1 << 14,
1760 PTA_SSE4_2 = 1 << 15
1765 const char *const name; /* processor name or nickname. */
1766 const enum processor_type processor;
1767 const unsigned /*enum pta_flags*/ flags;
1769 const processor_alias_table[] =
1771 {"i386", PROCESSOR_I386, 0},
1772 {"i486", PROCESSOR_I486, 0},
1773 {"i586", PROCESSOR_PENTIUM, 0},
1774 {"pentium", PROCESSOR_PENTIUM, 0},
1775 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1776 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1777 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1779 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1780 {"i686", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1782 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1783 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1784 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1785 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1786 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1787 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1788 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1789 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1790 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1791 | PTA_CX16 | PTA_NO_SAHF)},
1792 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1793 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1796 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1797 |PTA_PREFETCH_SSE)},
1798 {"k6", PROCESSOR_K6, PTA_MMX},
1799 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1800 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1801 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1802 | PTA_PREFETCH_SSE)},
1803 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1804 | PTA_PREFETCH_SSE)},
1805 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1807 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1809 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1811 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1812 | PTA_MMX | PTA_SSE | PTA_SSE2
1814 {"k8", PROCESSOR_K8, (PTA_64BIT
1815 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1816 | PTA_SSE | PTA_SSE2
1818 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1819 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1820 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1822 {"opteron", PROCESSOR_K8, (PTA_64BIT
1823 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1824 | PTA_SSE | PTA_SSE2
1826 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1827 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1828 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1830 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1831 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1832 | PTA_SSE | PTA_SSE2
1834 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1835 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1836 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1838 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1839 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_SSE | PTA_SSE2
1842 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1843 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1846 | PTA_CX16 | PTA_ABM)},
1847 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1848 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1849 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1851 | PTA_CX16 | PTA_ABM)},
1852 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1853 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1856 int const pta_size = ARRAY_SIZE (processor_alias_table);
1858 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1859 SUBTARGET_OVERRIDE_OPTIONS;
1862 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1863 SUBSUBTARGET_OVERRIDE_OPTIONS;
1866 /* -fPIC is the default for x86_64. */
1867 if (TARGET_MACHO && TARGET_64BIT)
1870 /* Set the default values for switches whose default depends on TARGET_64BIT
1871 in case they weren't overwritten by command line options. */
1874 /* Mach-O doesn't support omitting the frame pointer for now. */
1875 if (flag_omit_frame_pointer == 2)
1876 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1877 if (flag_asynchronous_unwind_tables == 2)
1878 flag_asynchronous_unwind_tables = 1;
1879 if (flag_pcc_struct_return == 2)
1880 flag_pcc_struct_return = 0;
1884 if (flag_omit_frame_pointer == 2)
1885 flag_omit_frame_pointer = 0;
1886 if (flag_asynchronous_unwind_tables == 2)
1887 flag_asynchronous_unwind_tables = 0;
1888 if (flag_pcc_struct_return == 2)
1889 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1892 /* Need to check -mtune=generic first. */
1893 if (ix86_tune_string)
1895 if (!strcmp (ix86_tune_string, "generic")
1896 || !strcmp (ix86_tune_string, "i686")
1897 /* As special support for cross compilers we read -mtune=native
1898 as -mtune=generic. With native compilers we won't see the
1899 -mtune=native, as it was changed by the driver. */
1900 || !strcmp (ix86_tune_string, "native"))
1903 ix86_tune_string = "generic64";
1905 ix86_tune_string = "generic32";
1907 else if (!strncmp (ix86_tune_string, "generic", 7))
1908 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1912 if (ix86_arch_string)
1913 ix86_tune_string = ix86_arch_string;
1914 if (!ix86_tune_string)
1916 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1917 ix86_tune_defaulted = 1;
1920 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1921 need to use a sensible tune option. */
1922 if (!strcmp (ix86_tune_string, "generic")
1923 || !strcmp (ix86_tune_string, "x86-64")
1924 || !strcmp (ix86_tune_string, "i686"))
1927 ix86_tune_string = "generic64";
1929 ix86_tune_string = "generic32";
1932 if (ix86_stringop_string)
1934 if (!strcmp (ix86_stringop_string, "rep_byte"))
1935 stringop_alg = rep_prefix_1_byte;
1936 else if (!strcmp (ix86_stringop_string, "libcall"))
1937 stringop_alg = libcall;
1938 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1939 stringop_alg = rep_prefix_4_byte;
1940 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1941 stringop_alg = rep_prefix_8_byte;
1942 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1943 stringop_alg = loop_1_byte;
1944 else if (!strcmp (ix86_stringop_string, "loop"))
1945 stringop_alg = loop;
1946 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1947 stringop_alg = unrolled_loop;
1949 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1951 if (!strcmp (ix86_tune_string, "x86-64"))
1952 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1953 "-mtune=generic instead as appropriate.");
1955 if (!ix86_arch_string)
1956 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1958 ix86_arch_specified = 1;
1960 if (!strcmp (ix86_arch_string, "generic"))
1961 error ("generic CPU can be used only for -mtune= switch");
1962 if (!strncmp (ix86_arch_string, "generic", 7))
1963 error ("bad value (%s) for -march= switch", ix86_arch_string);
1965 if (ix86_cmodel_string != 0)
1967 if (!strcmp (ix86_cmodel_string, "small"))
1968 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1969 else if (!strcmp (ix86_cmodel_string, "medium"))
1970 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1971 else if (!strcmp (ix86_cmodel_string, "large"))
1972 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1974 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1975 else if (!strcmp (ix86_cmodel_string, "32"))
1976 ix86_cmodel = CM_32;
1977 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1978 ix86_cmodel = CM_KERNEL;
1980 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1984 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1985 use of rip-relative addressing. This eliminates fixups that
1986 would otherwise be needed if this object is to be placed in a
1987 DLL, and is essentially just as efficient as direct addressing. */
1988 if (TARGET_64BIT_MS_ABI)
1989 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1990 else if (TARGET_64BIT)
1991 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1993 ix86_cmodel = CM_32;
1995 if (ix86_asm_string != 0)
1998 && !strcmp (ix86_asm_string, "intel"))
1999 ix86_asm_dialect = ASM_INTEL;
2000 else if (!strcmp (ix86_asm_string, "att"))
2001 ix86_asm_dialect = ASM_ATT;
2003 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2005 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2006 error ("code model %qs not supported in the %s bit mode",
2007 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2008 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2009 sorry ("%i-bit mode not compiled in",
2010 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2012 for (i = 0; i < pta_size; i++)
2013 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2015 ix86_arch = processor_alias_table[i].processor;
2016 /* Default cpu tuning to the architecture. */
2017 ix86_tune = ix86_arch;
2019 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2020 error ("CPU you selected does not support x86-64 "
2023 if (processor_alias_table[i].flags & PTA_MMX
2024 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2025 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2026 if (processor_alias_table[i].flags & PTA_3DNOW
2027 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2028 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2029 if (processor_alias_table[i].flags & PTA_3DNOW_A
2030 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2031 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2032 if (processor_alias_table[i].flags & PTA_SSE
2033 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2034 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2035 if (processor_alias_table[i].flags & PTA_SSE2
2036 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2037 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2038 if (processor_alias_table[i].flags & PTA_SSE3
2039 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2040 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2041 if (processor_alias_table[i].flags & PTA_SSSE3
2042 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2043 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2044 if (processor_alias_table[i].flags & PTA_SSE4_1
2045 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2047 if (processor_alias_table[i].flags & PTA_SSE4_2
2048 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2050 if (processor_alias_table[i].flags & PTA_SSE4A
2051 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2052 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2054 if (processor_alias_table[i].flags & PTA_ABM)
2056 if (processor_alias_table[i].flags & PTA_CX16)
2057 x86_cmpxchg16b = true;
2058 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2060 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2061 x86_prefetch_sse = true;
2062 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2069 error ("bad value (%s) for -march= switch", ix86_arch_string);
2071 ix86_arch_mask = 1u << ix86_arch;
2072 for (i = 0; i < X86_ARCH_LAST; ++i)
2073 ix86_arch_features[i] &= ix86_arch_mask;
2075 for (i = 0; i < pta_size; i++)
2076 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2078 ix86_tune = processor_alias_table[i].processor;
2079 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2081 if (ix86_tune_defaulted)
2083 ix86_tune_string = "x86-64";
2084 for (i = 0; i < pta_size; i++)
2085 if (! strcmp (ix86_tune_string,
2086 processor_alias_table[i].name))
2088 ix86_tune = processor_alias_table[i].processor;
2091 error ("CPU you selected does not support x86-64 "
2094 /* Intel CPUs have always interpreted SSE prefetch instructions as
2095 NOPs; so, we can enable SSE prefetch instructions even when
2096 -mtune (rather than -march) points us to a processor that has them.
2097 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2098 higher processors. */
2100 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2101 x86_prefetch_sse = true;
2105 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2107 ix86_tune_mask = 1u << ix86_tune;
2108 for (i = 0; i < X86_TUNE_LAST; ++i)
2109 ix86_tune_features[i] &= ix86_tune_mask;
2112 ix86_cost = &size_cost;
2114 ix86_cost = processor_target_table[ix86_tune].cost;
2116 /* Arrange to set up i386_stack_locals for all functions. */
2117 init_machine_status = ix86_init_machine_status;
2119 /* Validate -mregparm= value. */
2120 if (ix86_regparm_string)
2123 warning (0, "-mregparm is ignored in 64-bit mode");
2124 i = atoi (ix86_regparm_string);
2125 if (i < 0 || i > REGPARM_MAX)
2126 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2131 ix86_regparm = REGPARM_MAX;
2133 /* If the user has provided any of the -malign-* options,
2134 warn and use that value only if -falign-* is not set.
2135 Remove this code in GCC 3.2 or later. */
2136 if (ix86_align_loops_string)
2138 warning (0, "-malign-loops is obsolete, use -falign-loops");
2139 if (align_loops == 0)
2141 i = atoi (ix86_align_loops_string);
2142 if (i < 0 || i > MAX_CODE_ALIGN)
2143 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2145 align_loops = 1 << i;
2149 if (ix86_align_jumps_string)
2151 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2152 if (align_jumps == 0)
2154 i = atoi (ix86_align_jumps_string);
2155 if (i < 0 || i > MAX_CODE_ALIGN)
2156 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2158 align_jumps = 1 << i;
2162 if (ix86_align_funcs_string)
2164 warning (0, "-malign-functions is obsolete, use -falign-functions");
2165 if (align_functions == 0)
2167 i = atoi (ix86_align_funcs_string);
2168 if (i < 0 || i > MAX_CODE_ALIGN)
2169 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2171 align_functions = 1 << i;
2175 /* Default align_* from the processor table. */
2176 if (align_loops == 0)
2178 align_loops = processor_target_table[ix86_tune].align_loop;
2179 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2181 if (align_jumps == 0)
2183 align_jumps = processor_target_table[ix86_tune].align_jump;
2184 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2186 if (align_functions == 0)
2188 align_functions = processor_target_table[ix86_tune].align_func;
2191 /* Validate -mbranch-cost= value, or provide default. */
2192 ix86_branch_cost = ix86_cost->branch_cost;
2193 if (ix86_branch_cost_string)
2195 i = atoi (ix86_branch_cost_string);
2197 error ("-mbranch-cost=%d is not between 0 and 5", i);
2199 ix86_branch_cost = i;
2201 if (ix86_section_threshold_string)
2203 i = atoi (ix86_section_threshold_string);
2205 error ("-mlarge-data-threshold=%d is negative", i);
2207 ix86_section_threshold = i;
2210 if (ix86_tls_dialect_string)
2212 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2213 ix86_tls_dialect = TLS_DIALECT_GNU;
2214 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2215 ix86_tls_dialect = TLS_DIALECT_GNU2;
2216 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2217 ix86_tls_dialect = TLS_DIALECT_SUN;
2219 error ("bad value (%s) for -mtls-dialect= switch",
2220 ix86_tls_dialect_string);
2223 if (ix87_precision_string)
2225 i = atoi (ix87_precision_string);
2226 if (i != 32 && i != 64 && i != 80)
2227 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2232 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2234 /* Enable by default the SSE and MMX builtins. Do allow the user to
2235 explicitly disable any of these. In particular, disabling SSE and
2236 MMX for kernel code is extremely useful. */
2237 if (!ix86_arch_specified)
2239 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2240 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2243 warning (0, "-mrtd is ignored in 64bit mode");
2247 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2249 if (!ix86_arch_specified)
2251 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2253 /* i386 ABI does not specify red zone. It still makes sense to use it
2254 when programmer takes care to stack from being destroyed. */
2255 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2256 target_flags |= MASK_NO_RED_ZONE;
2259 /* Keep nonleaf frame pointers. */
2260 if (flag_omit_frame_pointer)
2261 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2262 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2263 flag_omit_frame_pointer = 1;
2265 /* If we're doing fast math, we don't care about comparison order
2266 wrt NaNs. This lets us use a shorter comparison sequence. */
2267 if (flag_finite_math_only)
2268 target_flags &= ~MASK_IEEE_FP;
2270 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2271 since the insns won't need emulation. */
2272 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2273 target_flags &= ~MASK_NO_FANCY_MATH_387;
2275 /* Likewise, if the target doesn't have a 387, or we've specified
2276 software floating point, don't use 387 inline intrinsics. */
2278 target_flags |= MASK_NO_FANCY_MATH_387;
2280 /* Turn on SSE4.1 builtins for -msse4.2. */
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2284 /* Turn on SSSE3 builtins for -msse4.1. */
2286 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2288 /* Turn on SSE3 builtins for -mssse3. */
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2292 /* Turn on SSE3 builtins for -msse4a. */
2294 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2296 /* Turn on SSE2 builtins for -msse3. */
2298 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2300 /* Turn on SSE builtins for -msse2. */
2302 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2304 /* Turn on MMX builtins for -msse. */
2307 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2308 x86_prefetch_sse = true;
2311 /* Turn on MMX builtins for 3Dnow. */
2313 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2315 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2316 if (TARGET_SSE4_2 || TARGET_ABM)
2319 /* Validate -mpreferred-stack-boundary= value, or provide default.
2320 The default of 128 bits is for Pentium III's SSE __m128. We can't
2321 change it because of optimize_size. Otherwise, we can't mix object
2322 files compiled with -Os and -On. */
2323 ix86_preferred_stack_boundary = 128;
2324 if (ix86_preferred_stack_boundary_string)
2326 i = atoi (ix86_preferred_stack_boundary_string);
2327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2329 TARGET_64BIT ? 4 : 2);
2331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2334 /* Accept -msseregparm only if at least SSE support is enabled. */
2335 if (TARGET_SSEREGPARM
2337 error ("-msseregparm used without SSE enabled");
2339 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2340 if (ix86_fpmath_string != 0)
2342 if (! strcmp (ix86_fpmath_string, "387"))
2343 ix86_fpmath = FPMATH_387;
2344 else if (! strcmp (ix86_fpmath_string, "sse"))
2348 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2349 ix86_fpmath = FPMATH_387;
2352 ix86_fpmath = FPMATH_SSE;
2354 else if (! strcmp (ix86_fpmath_string, "387,sse")
2355 || ! strcmp (ix86_fpmath_string, "sse,387"))
2359 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2360 ix86_fpmath = FPMATH_387;
2362 else if (!TARGET_80387)
2364 warning (0, "387 instruction set disabled, using SSE arithmetics");
2365 ix86_fpmath = FPMATH_SSE;
2368 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2371 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2374 /* If the i387 is disabled, then do not return values in it. */
2376 target_flags &= ~MASK_FLOAT_RETURNS;
2378 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2379 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2381 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2383 /* ??? Unwind info is not correct around the CFG unless either a frame
2384 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2385 unwind info generation to be aware of the CFG and propagating states
2387 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2388 || flag_exceptions || flag_non_call_exceptions)
2389 && flag_omit_frame_pointer
2390 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2392 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2393 warning (0, "unwind tables currently require either a frame pointer "
2394 "or -maccumulate-outgoing-args for correctness");
2395 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2398 /* For sane SSE instruction set generation we need fcomi instruction.
2399 It is safe to enable all CMOVE instructions. */
2403 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2406 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2407 p = strchr (internal_label_prefix, 'X');
2408 internal_label_prefix_len = p - internal_label_prefix;
2412 /* When scheduling description is not available, disable scheduler pass
2413 so it won't slow down the compilation and make x87 code slower. */
2414 if (!TARGET_SCHEDULE)
2415 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2417 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2418 set_param_value ("simultaneous-prefetches",
2419 ix86_cost->simultaneous_prefetches);
2420 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2421 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2424 /* Return true if this goes in large data/bss. */
2427 ix86_in_large_data_p (tree exp)
2429 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2432 /* Functions are never large data. */
2433 if (TREE_CODE (exp) == FUNCTION_DECL)
2436 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2438 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2439 if (strcmp (section, ".ldata") == 0
2440 || strcmp (section, ".lbss") == 0)
2446 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2448 /* If this is an incomplete type with size 0, then we can't put it
2449 in data because it might be too big when completed. */
2450 if (!size || size > ix86_section_threshold)
2457 /* Switch to the appropriate section for output of DECL.
2458 DECL is either a `VAR_DECL' node or a constant of some sort.
2459 RELOC indicates whether forming the initial value of DECL requires
2460 link-time relocations. */
2462 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2466 x86_64_elf_select_section (tree decl, int reloc,
2467 unsigned HOST_WIDE_INT align)
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && ix86_in_large_data_p (decl))
2472 const char *sname = NULL;
2473 unsigned int flags = SECTION_WRITE;
2474 switch (categorize_decl_for_section (decl, reloc))
2479 case SECCAT_DATA_REL:
2480 sname = ".ldata.rel";
2482 case SECCAT_DATA_REL_LOCAL:
2483 sname = ".ldata.rel.local";
2485 case SECCAT_DATA_REL_RO:
2486 sname = ".ldata.rel.ro";
2488 case SECCAT_DATA_REL_RO_LOCAL:
2489 sname = ".ldata.rel.ro.local";
2493 flags |= SECTION_BSS;
2496 case SECCAT_RODATA_MERGE_STR:
2497 case SECCAT_RODATA_MERGE_STR_INIT:
2498 case SECCAT_RODATA_MERGE_CONST:
2502 case SECCAT_SRODATA:
2509 /* We don't split these for medium model. Place them into
2510 default sections and hope for best. */
2515 /* We might get called with string constants, but get_named_section
2516 doesn't like them as they are not DECLs. Also, we need to set
2517 flags in that case. */
2519 return get_section (sname, flags, NULL);
2520 return get_named_section (decl, sname, reloc);
2523 return default_elf_select_section (decl, reloc, align);
2526 /* Build up a unique section name, expressed as a
2527 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2528 RELOC indicates whether the initial value of EXP requires
2529 link-time relocations. */
2531 static void ATTRIBUTE_UNUSED
2532 x86_64_elf_unique_section (tree decl, int reloc)
2534 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2535 && ix86_in_large_data_p (decl))
2537 const char *prefix = NULL;
2538 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2539 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2541 switch (categorize_decl_for_section (decl, reloc))
2544 case SECCAT_DATA_REL:
2545 case SECCAT_DATA_REL_LOCAL:
2546 case SECCAT_DATA_REL_RO:
2547 case SECCAT_DATA_REL_RO_LOCAL:
2548 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2551 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2554 case SECCAT_RODATA_MERGE_STR:
2555 case SECCAT_RODATA_MERGE_STR_INIT:
2556 case SECCAT_RODATA_MERGE_CONST:
2557 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2559 case SECCAT_SRODATA:
2566 /* We don't split these for medium model. Place them into
2567 default sections and hope for best. */
2575 plen = strlen (prefix);
2577 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2578 name = targetm.strip_name_encoding (name);
2579 nlen = strlen (name);
2581 string = (char *) alloca (nlen + plen + 1);
2582 memcpy (string, prefix, plen);
2583 memcpy (string + plen, name, nlen + 1);
2585 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2589 default_unique_section (decl, reloc);
2592 #ifdef COMMON_ASM_OP
2593 /* This says how to output assembler code to declare an
2594 uninitialized external linkage data object.
2596 For medium model x86-64 we need to use .largecomm opcode for
2599 x86_elf_aligned_common (FILE *file,
2600 const char *name, unsigned HOST_WIDE_INT size,
2603 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2604 && size > (unsigned int)ix86_section_threshold)
2605 fprintf (file, ".largecomm\t");
2607 fprintf (file, "%s", COMMON_ASM_OP);
2608 assemble_name (file, name);
2609 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2610 size, align / BITS_PER_UNIT);
2614 /* Utility function for targets to use in implementing
2615 ASM_OUTPUT_ALIGNED_BSS. */
2618 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2619 const char *name, unsigned HOST_WIDE_INT size,
2622 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2623 && size > (unsigned int)ix86_section_threshold)
2624 switch_to_section (get_named_section (decl, ".lbss", 0));
2626 switch_to_section (bss_section);
2627 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2628 #ifdef ASM_DECLARE_OBJECT_NAME
2629 last_assemble_variable_decl = decl;
2630 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2632 /* Standard thing is just output label for the object. */
2633 ASM_OUTPUT_LABEL (file, name);
2634 #endif /* ASM_DECLARE_OBJECT_NAME */
2635 ASM_OUTPUT_SKIP (file, size ? size : 1);
2639 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2641 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2642 make the problem with not enough registers even worse. */
2643 #ifdef INSN_SCHEDULING
2645 flag_schedule_insns = 0;
2649 /* The Darwin libraries never set errno, so we might as well
2650 avoid calling them when that's the only reason we would. */
2651 flag_errno_math = 0;
2653 /* The default values of these switches depend on the TARGET_64BIT
2654 that is not known at this moment. Mark these values with 2 and
2655 let user the to override these. In case there is no command line option
2656 specifying them, we will set the defaults in override_options. */
2658 flag_omit_frame_pointer = 2;
2659 flag_pcc_struct_return = 2;
2660 flag_asynchronous_unwind_tables = 2;
2661 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2662 SUBTARGET_OPTIMIZATION_OPTIONS;
2666 /* Decide whether we can make a sibling call to a function. DECL is the
2667 declaration of the function being targeted by the call and EXP is the
2668 CALL_EXPR representing the call. */
2671 ix86_function_ok_for_sibcall (tree decl, tree exp)
2676 /* If we are generating position-independent code, we cannot sibcall
2677 optimize any indirect call, or a direct call to a global function,
2678 as the PLT requires %ebx be live. */
2679 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2686 func = TREE_TYPE (CALL_EXPR_FN (exp));
2687 if (POINTER_TYPE_P (func))
2688 func = TREE_TYPE (func);
2691 /* Check that the return value locations are the same. Like
2692 if we are returning floats on the 80387 register stack, we cannot
2693 make a sibcall from a function that doesn't return a float to a
2694 function that does or, conversely, from a function that does return
2695 a float to a function that doesn't; the necessary stack adjustment
2696 would not be executed. This is also the place we notice
2697 differences in the return value ABI. Note that it is ok for one
2698 of the functions to have void return type as long as the return
2699 value of the other is passed in a register. */
2700 a = ix86_function_value (TREE_TYPE (exp), func, false);
2701 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2703 if (STACK_REG_P (a) || STACK_REG_P (b))
2705 if (!rtx_equal_p (a, b))
2708 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2710 else if (!rtx_equal_p (a, b))
2713 /* If this call is indirect, we'll need to be able to use a call-clobbered
2714 register for the address of the target function. Make sure that all
2715 such registers are not used for passing parameters. */
2716 if (!decl && !TARGET_64BIT)
2720 /* We're looking at the CALL_EXPR, we need the type of the function. */
2721 type = CALL_EXPR_FN (exp); /* pointer expression */
2722 type = TREE_TYPE (type); /* pointer type */
2723 type = TREE_TYPE (type); /* function type */
2725 if (ix86_function_regparm (type, NULL) >= 3)
2727 /* ??? Need to count the actual number of registers to be used,
2728 not the possible number of registers. Fix later. */
2733 /* Dllimport'd functions are also called indirectly. */
2734 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2735 && decl && DECL_DLLIMPORT_P (decl)
2736 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2739 /* If we forced aligned the stack, then sibcalling would unalign the
2740 stack, which may break the called function. */
2741 if (cfun->machine->force_align_arg_pointer)
2744 /* Otherwise okay. That also includes certain types of indirect calls. */
2748 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2749 calling convention attributes;
2750 arguments as in struct attribute_spec.handler. */
2753 ix86_handle_cconv_attribute (tree *node, tree name,
2755 int flags ATTRIBUTE_UNUSED,
2758 if (TREE_CODE (*node) != FUNCTION_TYPE
2759 && TREE_CODE (*node) != METHOD_TYPE
2760 && TREE_CODE (*node) != FIELD_DECL
2761 && TREE_CODE (*node) != TYPE_DECL)
2763 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2764 IDENTIFIER_POINTER (name));
2765 *no_add_attrs = true;
2769 /* Can combine regparm with all attributes but fastcall. */
2770 if (is_attribute_p ("regparm", name))
2774 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2776 error ("fastcall and regparm attributes are not compatible");
2779 cst = TREE_VALUE (args);
2780 if (TREE_CODE (cst) != INTEGER_CST)
2782 warning (OPT_Wattributes,
2783 "%qs attribute requires an integer constant argument",
2784 IDENTIFIER_POINTER (name));
2785 *no_add_attrs = true;
2787 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2789 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2790 IDENTIFIER_POINTER (name), REGPARM_MAX);
2791 *no_add_attrs = true;
2795 && lookup_attribute (ix86_force_align_arg_pointer_string,
2796 TYPE_ATTRIBUTES (*node))
2797 && compare_tree_int (cst, REGPARM_MAX-1))
2799 error ("%s functions limited to %d register parameters",
2800 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2808 /* Do not warn when emulating the MS ABI. */
2809 if (!TARGET_64BIT_MS_ABI)
2810 warning (OPT_Wattributes, "%qs attribute ignored",
2811 IDENTIFIER_POINTER (name));
2812 *no_add_attrs = true;
2816 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2817 if (is_attribute_p ("fastcall", name))
2819 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2821 error ("fastcall and cdecl attributes are not compatible");
2823 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2825 error ("fastcall and stdcall attributes are not compatible");
2827 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2829 error ("fastcall and regparm attributes are not compatible");
2833 /* Can combine stdcall with fastcall (redundant), regparm and
2835 else if (is_attribute_p ("stdcall", name))
2837 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2839 error ("stdcall and cdecl attributes are not compatible");
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2843 error ("stdcall and fastcall attributes are not compatible");
2847 /* Can combine cdecl with regparm and sseregparm. */
2848 else if (is_attribute_p ("cdecl", name))
2850 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2852 error ("stdcall and cdecl attributes are not compatible");
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2856 error ("fastcall and cdecl attributes are not compatible");
2860 /* Can combine sseregparm with all attributes. */
2865 /* Return 0 if the attributes for two types are incompatible, 1 if they
2866 are compatible, and 2 if they are nearly compatible (which causes a
2867 warning to be generated). */
2870 ix86_comp_type_attributes (tree type1, tree type2)
2872 /* Check for mismatch of non-default calling convention. */
2873 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2875 if (TREE_CODE (type1) != FUNCTION_TYPE)
2878 /* Check for mismatched fastcall/regparm types. */
2879 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2880 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2881 || (ix86_function_regparm (type1, NULL)
2882 != ix86_function_regparm (type2, NULL)))
2885 /* Check for mismatched sseregparm types. */
2886 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2887 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2890 /* Check for mismatched return types (cdecl vs stdcall). */
2891 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2892 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2898 /* Return the regparm value for a function with the indicated TYPE and DECL.
2899 DECL may be NULL when calling function indirectly
2900 or considering a libcall. */
2903 ix86_function_regparm (tree type, tree decl)
2906 int regparm = ix86_regparm;
2911 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2913 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2918 /* Use register calling convention for local functions when possible. */
2919 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2920 && flag_unit_at_a_time && !profile_flag)
2922 struct cgraph_local_info *i = cgraph_local_info (decl);
2925 int local_regparm, globals = 0, regno;
2928 /* Make sure no regparm register is taken by a
2929 global register variable. */
2930 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2931 if (global_regs[local_regparm])
2934 /* We can't use regparm(3) for nested functions as these use
2935 static chain pointer in third argument. */
2936 if (local_regparm == 3
2937 && (decl_function_context (decl)
2938 || ix86_force_align_arg_pointer)
2939 && !DECL_NO_STATIC_CHAIN (decl))
2942 /* If the function realigns its stackpointer, the prologue will
2943 clobber %ecx. If we've already generated code for the callee,
2944 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2945 scanning the attributes for the self-realigning property. */
2946 f = DECL_STRUCT_FUNCTION (decl);
2947 if (local_regparm == 3
2948 && (f ? !!f->machine->force_align_arg_pointer
2949 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2953 /* Each global register variable increases register preassure,
2954 so the more global reg vars there are, the smaller regparm
2955 optimization use, unless requested by the user explicitly. */
2956 for (regno = 0; regno < 6; regno++)
2957 if (global_regs[regno])
2960 = globals < local_regparm ? local_regparm - globals : 0;
2962 if (local_regparm > regparm)
2963 regparm = local_regparm;
2970 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2971 DFmode (2) arguments in SSE registers for a function with the
2972 indicated TYPE and DECL. DECL may be NULL when calling function
2973 indirectly or considering a libcall. Otherwise return 0. */
2976 ix86_function_sseregparm (tree type, tree decl)
2978 gcc_assert (!TARGET_64BIT);
2980 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2981 by the sseregparm attribute. */
2982 if (TARGET_SSEREGPARM
2983 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2988 error ("Calling %qD with attribute sseregparm without "
2989 "SSE/SSE2 enabled", decl);
2991 error ("Calling %qT with attribute sseregparm without "
2992 "SSE/SSE2 enabled", type);
2999 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3000 (and DFmode for SSE2) arguments in SSE registers. */
3001 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3003 struct cgraph_local_info *i = cgraph_local_info (decl);
3005 return TARGET_SSE2 ? 2 : 1;
3011 /* Return true if EAX is live at the start of the function. Used by
3012 ix86_expand_prologue to determine if we need special help before
3013 calling allocate_stack_worker. */
3016 ix86_eax_live_at_start_p (void)
3018 /* Cheat. Don't bother working forward from ix86_function_regparm
3019 to the function type to whether an actual argument is located in
3020 eax. Instead just look at cfg info, which is still close enough
3021 to correct at this point. This gives false positives for broken
3022 functions that might use uninitialized data that happens to be
3023 allocated in eax, but who cares? */
3024 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3027 /* Return true if TYPE has a variable argument list. */
3030 type_has_variadic_args_p (tree type)
3032 tree n, t = TYPE_ARG_TYPES (type);
3037 while ((n = TREE_CHAIN (t)) != NULL)
3040 return TREE_VALUE (t) != void_type_node;
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3061 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3065 /* None of the 64-bit ABIs pop arguments. */
3069 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3071 /* Cdecl functions override -mrtd, and never pop the stack. */
3072 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3074 /* Stdcall and fastcall functions will pop the stack if not
3076 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3077 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3080 if (rtd && ! type_has_variadic_args_p (funtype))
3084 /* Lose any fake structure return argument if it is passed on the stack. */
3085 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3086 && !KEEP_AGGREGATE_RETURN_POINTER)
3088 int nregs = ix86_function_regparm (funtype, fundecl);
3090 return GET_MODE_SIZE (Pmode);
3096 /* Argument support functions. */
3098 /* Return true when register may be used to pass function parameters. */
3100 ix86_function_arg_regno_p (int regno)
3103 const int *parm_regs;
3108 return (regno < REGPARM_MAX
3109 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3111 return (regno < REGPARM_MAX
3112 || (TARGET_MMX && MMX_REGNO_P (regno)
3113 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3114 || (TARGET_SSE && SSE_REGNO_P (regno)
3115 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3120 if (SSE_REGNO_P (regno) && TARGET_SSE)
3125 if (TARGET_SSE && SSE_REGNO_P (regno)
3126 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3130 /* RAX is used as hidden argument to va_arg functions. */
3131 if (!TARGET_64BIT_MS_ABI && regno == 0)
3134 if (TARGET_64BIT_MS_ABI)
3135 parm_regs = x86_64_ms_abi_int_parameter_registers;
3137 parm_regs = x86_64_int_parameter_registers;
3138 for (i = 0; i < REGPARM_MAX; i++)
3139 if (regno == parm_regs[i])
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3147 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3149 if (must_pass_in_stack_var_size_or_pad (mode, type))
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT && mode == TImode
3156 && type && TREE_CODE (type) != VECTOR_TYPE);
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3164 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3165 tree fntype, /* tree ptr for function decl */
3166 rtx libname, /* SYMBOL_REF of library name or 0 */
3169 memset (cum, 0, sizeof (*cum));
3171 /* Set up the number of registers to use for passing arguments. */
3172 cum->nregs = ix86_regparm;
3174 cum->sse_nregs = SSE_REGPARM_MAX;
3176 cum->mmx_nregs = MMX_REGPARM_MAX;
3177 cum->warn_sse = true;
3178 cum->warn_mmx = true;
3179 cum->maybe_vaarg = (fntype
3180 ? (!TYPE_ARG_TYPES (fntype)
3181 || type_has_variadic_args_p (fntype))
3186 /* If there are variable arguments, then we won't pass anything
3187 in registers in 32-bit mode. */
3188 if (cum->maybe_vaarg)
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3208 cum->nregs = ix86_function_regparm (fntype, fndecl);
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3217 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3218 But in the case of vector types, it is some vector mode.
3220 When we have only some of our vector isa extensions enabled, then there
3221 are some modes for which vector_mode_supported_p is false. For these
3222 modes, the generic vector support in gcc will choose some non-vector mode
3223 in order to implement the type. By computing the natural mode, we'll
3224 select the proper ABI location for the operand and not depend on whatever
3225 the middle-end decides to do with these vector types. */
3227 static enum machine_mode
3228 type_natural_mode (tree type)
3230 enum machine_mode mode = TYPE_MODE (type);
3232 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3234 HOST_WIDE_INT size = int_size_in_bytes (type);
3235 if ((size == 8 || size == 16)
3236 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3237 && TYPE_VECTOR_SUBPARTS (type) > 1)
3239 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3241 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3242 mode = MIN_MODE_VECTOR_FLOAT;
3244 mode = MIN_MODE_VECTOR_INT;
3246 /* Get the mode which has this inner mode and number of units. */
3247 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3248 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3249 && GET_MODE_INNER (mode) == innermode)
3259 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3260 this may not agree with the mode that the type system has chosen for the
3261 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3262 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3265 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3270 if (orig_mode != BLKmode)
3271 tmp = gen_rtx_REG (orig_mode, regno);
3274 tmp = gen_rtx_REG (mode, regno);
3275 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3276 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3282 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3283 of this code is to classify each 8bytes of incoming argument by the register
3284 class and assign registers accordingly. */
3286 /* Return the union class of CLASS1 and CLASS2.
3287 See the x86-64 PS ABI for details. */
3289 static enum x86_64_reg_class
3290 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3292 /* Rule #1: If both classes are equal, this is the resulting class. */
3293 if (class1 == class2)
3296 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3298 if (class1 == X86_64_NO_CLASS)
3300 if (class2 == X86_64_NO_CLASS)
3303 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3304 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3305 return X86_64_MEMORY_CLASS;
3307 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3308 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3309 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3310 return X86_64_INTEGERSI_CLASS;
3311 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3312 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3313 return X86_64_INTEGER_CLASS;
3315 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3317 if (class1 == X86_64_X87_CLASS
3318 || class1 == X86_64_X87UP_CLASS
3319 || class1 == X86_64_COMPLEX_X87_CLASS
3320 || class2 == X86_64_X87_CLASS
3321 || class2 == X86_64_X87UP_CLASS
3322 || class2 == X86_64_COMPLEX_X87_CLASS)
3323 return X86_64_MEMORY_CLASS;
3325 /* Rule #6: Otherwise class SSE is used. */
3326 return X86_64_SSE_CLASS;
3329 /* Classify the argument of type TYPE and mode MODE.
3330 CLASSES will be filled by the register class used to pass each word
3331 of the operand. The number of words is returned. In case the parameter
3332 should be passed in memory, 0 is returned. As a special case for zero
3333 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3335 BIT_OFFSET is used internally for handling records and specifies offset
3336 of the offset in bits modulo 256 to avoid overflow cases.
3338 See the x86-64 PS ABI for details.
3342 classify_argument (enum machine_mode mode, tree type,
3343 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3345 HOST_WIDE_INT bytes =
3346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3347 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3349 /* Variable sized entities are always passed/returned in memory. */
3353 if (mode != VOIDmode
3354 && targetm.calls.must_pass_in_stack (mode, type))
3357 if (type && AGGREGATE_TYPE_P (type))
3361 enum x86_64_reg_class subclasses[MAX_CLASSES];
3363 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3367 for (i = 0; i < words; i++)
3368 classes[i] = X86_64_NO_CLASS;
3370 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3371 signalize memory class, so handle it as special case. */
3374 classes[0] = X86_64_NO_CLASS;
3378 /* Classify each field of record and merge classes. */
3379 switch (TREE_CODE (type))
3382 /* And now merge the fields of structure. */
3383 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3385 if (TREE_CODE (field) == FIELD_DECL)
3389 if (TREE_TYPE (field) == error_mark_node)
3392 /* Bitfields are always classified as integer. Handle them
3393 early, since later code would consider them to be
3394 misaligned integers. */
3395 if (DECL_BIT_FIELD (field))
3397 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3398 i < ((int_bit_position (field) + (bit_offset % 64))
3399 + tree_low_cst (DECL_SIZE (field), 0)
3402 merge_classes (X86_64_INTEGER_CLASS,
3407 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3408 TREE_TYPE (field), subclasses,
3409 (int_bit_position (field)
3410 + bit_offset) % 256);
3413 for (i = 0; i < num; i++)
3416 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3418 merge_classes (subclasses[i], classes[i + pos]);
3426 /* Arrays are handled as small records. */
3429 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3430 TREE_TYPE (type), subclasses, bit_offset);
3434 /* The partial classes are now full classes. */
3435 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3436 subclasses[0] = X86_64_SSE_CLASS;
3437 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3438 subclasses[0] = X86_64_INTEGER_CLASS;
3440 for (i = 0; i < words; i++)
3441 classes[i] = subclasses[i % num];
3446 case QUAL_UNION_TYPE:
3447 /* Unions are similar to RECORD_TYPE but offset is always 0.
3449 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3451 if (TREE_CODE (field) == FIELD_DECL)
3455 if (TREE_TYPE (field) == error_mark_node)
3458 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3459 TREE_TYPE (field), subclasses,
3463 for (i = 0; i < num; i++)
3464 classes[i] = merge_classes (subclasses[i], classes[i]);
3473 /* Final merger cleanup. */
3474 for (i = 0; i < words; i++)
3476 /* If one class is MEMORY, everything should be passed in
3478 if (classes[i] == X86_64_MEMORY_CLASS)
3481 /* The X86_64_SSEUP_CLASS should be always preceded by
3482 X86_64_SSE_CLASS. */
3483 if (classes[i] == X86_64_SSEUP_CLASS
3484 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3485 classes[i] = X86_64_SSE_CLASS;
3487 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3488 if (classes[i] == X86_64_X87UP_CLASS
3489 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3490 classes[i] = X86_64_SSE_CLASS;
3495 /* Compute alignment needed. We align all types to natural boundaries with
3496 exception of XFmode that is aligned to 64bits. */
3497 if (mode != VOIDmode && mode != BLKmode)
3499 int mode_alignment = GET_MODE_BITSIZE (mode);
3502 mode_alignment = 128;
3503 else if (mode == XCmode)
3504 mode_alignment = 256;
3505 if (COMPLEX_MODE_P (mode))
3506 mode_alignment /= 2;
3507 /* Misaligned fields are always returned in memory. */
3508 if (bit_offset % mode_alignment)
3512 /* for V1xx modes, just use the base mode */
3513 if (VECTOR_MODE_P (mode)
3514 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3515 mode = GET_MODE_INNER (mode);
3517 /* Classification of atomic types. */
3522 classes[0] = X86_64_SSE_CLASS;
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3535 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3536 classes[0] = X86_64_INTEGERSI_CLASS;
3538 classes[0] = X86_64_INTEGER_CLASS;
3542 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3547 if (!(bit_offset % 64))
3548 classes[0] = X86_64_SSESF_CLASS;
3550 classes[0] = X86_64_SSE_CLASS;
3553 classes[0] = X86_64_SSEDF_CLASS;
3556 classes[0] = X86_64_X87_CLASS;
3557 classes[1] = X86_64_X87UP_CLASS;
3560 classes[0] = X86_64_SSE_CLASS;
3561 classes[1] = X86_64_SSEUP_CLASS;
3564 classes[0] = X86_64_SSE_CLASS;
3567 classes[0] = X86_64_SSEDF_CLASS;
3568 classes[1] = X86_64_SSEDF_CLASS;
3571 classes[0] = X86_64_COMPLEX_X87_CLASS;
3574 /* This modes is larger than 16 bytes. */
3582 classes[0] = X86_64_SSE_CLASS;
3583 classes[1] = X86_64_SSEUP_CLASS;
3589 classes[0] = X86_64_SSE_CLASS;
3595 gcc_assert (VECTOR_MODE_P (mode));
3600 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3602 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3603 classes[0] = X86_64_INTEGERSI_CLASS;
3605 classes[0] = X86_64_INTEGER_CLASS;
3606 classes[1] = X86_64_INTEGER_CLASS;
3607 return 1 + (bytes > 8);
3611 /* Examine the argument and return set number of register required in each
3612 class. Return 0 iff parameter should be passed in memory. */
3614 examine_argument (enum machine_mode mode, tree type, int in_return,
3615 int *int_nregs, int *sse_nregs)
3617 enum x86_64_reg_class regclass[MAX_CLASSES];
3618 int n = classify_argument (mode, type, regclass, 0);
3624 for (n--; n >= 0; n--)
3625 switch (regclass[n])
3627 case X86_64_INTEGER_CLASS:
3628 case X86_64_INTEGERSI_CLASS:
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3636 case X86_64_NO_CLASS:
3637 case X86_64_SSEUP_CLASS:
3639 case X86_64_X87_CLASS:
3640 case X86_64_X87UP_CLASS:
3644 case X86_64_COMPLEX_X87_CLASS:
3645 return in_return ? 2 : 0;
3646 case X86_64_MEMORY_CLASS:
3652 /* Construct container for the argument used by GCC interface. See
3653 FUNCTION_ARG for the detailed description. */
3656 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3657 tree type, int in_return, int nintregs, int nsseregs,
3658 const int *intreg, int sse_regno)
3660 /* The following variables hold the static issued_error state. */
3661 static bool issued_sse_arg_error;
3662 static bool issued_sse_ret_error;
3663 static bool issued_x87_ret_error;
3665 enum machine_mode tmpmode;
3667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3668 enum x86_64_reg_class regclass[MAX_CLASSES];
3672 int needed_sseregs, needed_intregs;
3673 rtx exp[MAX_CLASSES];
3676 n = classify_argument (mode, type, regclass, 0);
3679 if (!examine_argument (mode, type, in_return, &needed_intregs,
3682 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3685 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3686 some less clueful developer tries to use floating-point anyway. */
3687 if (needed_sseregs && !TARGET_SSE)
3691 if (!issued_sse_ret_error)
3693 error ("SSE register return with SSE disabled");
3694 issued_sse_ret_error = true;
3697 else if (!issued_sse_arg_error)
3699 error ("SSE register argument with SSE disabled");
3700 issued_sse_arg_error = true;
3705 /* Likewise, error if the ABI requires us to return values in the
3706 x87 registers and the user specified -mno-80387. */
3707 if (!TARGET_80387 && in_return)
3708 for (i = 0; i < n; i++)
3709 if (regclass[i] == X86_64_X87_CLASS
3710 || regclass[i] == X86_64_X87UP_CLASS
3711 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3713 if (!issued_x87_ret_error)
3715 error ("x87 register return with x87 disabled");
3716 issued_x87_ret_error = true;
3721 /* First construct simple cases. Avoid SCmode, since we want to use
3722 single register to pass this type. */
3723 if (n == 1 && mode != SCmode)
3724 switch (regclass[0])
3726 case X86_64_INTEGER_CLASS:
3727 case X86_64_INTEGERSI_CLASS:
3728 return gen_rtx_REG (mode, intreg[0]);
3729 case X86_64_SSE_CLASS:
3730 case X86_64_SSESF_CLASS:
3731 case X86_64_SSEDF_CLASS:
3732 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3733 case X86_64_X87_CLASS:
3734 case X86_64_COMPLEX_X87_CLASS:
3735 return gen_rtx_REG (mode, FIRST_STACK_REG);
3736 case X86_64_NO_CLASS:
3737 /* Zero sized array, struct or class. */
3742 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3743 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3744 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3747 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3748 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3749 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3750 && regclass[1] == X86_64_INTEGER_CLASS
3751 && (mode == CDImode || mode == TImode || mode == TFmode)
3752 && intreg[0] + 1 == intreg[1])
3753 return gen_rtx_REG (mode, intreg[0]);
3755 /* Otherwise figure out the entries of the PARALLEL. */
3756 for (i = 0; i < n; i++)
3758 switch (regclass[i])
3760 case X86_64_NO_CLASS:
3762 case X86_64_INTEGER_CLASS:
3763 case X86_64_INTEGERSI_CLASS:
3764 /* Merge TImodes on aligned occasions here too. */
3765 if (i * 8 + 8 > bytes)
3766 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3767 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3771 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3772 if (tmpmode == BLKmode)
3774 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3775 gen_rtx_REG (tmpmode, *intreg),
3779 case X86_64_SSESF_CLASS:
3780 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3781 gen_rtx_REG (SFmode,
3782 SSE_REGNO (sse_regno)),
3786 case X86_64_SSEDF_CLASS:
3787 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3788 gen_rtx_REG (DFmode,
3789 SSE_REGNO (sse_regno)),
3793 case X86_64_SSE_CLASS:
3794 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3798 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3799 gen_rtx_REG (tmpmode,
3800 SSE_REGNO (sse_regno)),
3802 if (tmpmode == TImode)
3811 /* Empty aligned struct, union or class. */
3815 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3816 for (i = 0; i < nexps; i++)
3817 XVECEXP (ret, 0, i) = exp [i];
3821 /* Update the data in CUM to advance over an argument of mode MODE
3822 and data type TYPE. (TYPE is null for libcalls where that information
3823 may not be available.) */
3826 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3827 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3847 if (cum->nregs <= 0)
3855 if (cum->float_in_sse < 2)
3858 if (cum->float_in_sse < 1)
3869 if (!type || !AGGREGATE_TYPE_P (type))
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3886 if (!type || !AGGREGATE_TYPE_P (type))
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3902 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3903 tree type, HOST_WIDE_INT words)
3905 int int_nregs, sse_nregs;
3907 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3908 cum->words += words;
3909 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3911 cum->nregs -= int_nregs;
3912 cum->sse_nregs -= sse_nregs;
3913 cum->regno += int_nregs;
3914 cum->sse_regno += sse_nregs;
3917 cum->words += words;
3921 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3922 HOST_WIDE_INT words)
3924 /* Otherwise, this should be passed indirect. */
3925 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3927 cum->words += words;
3936 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3937 tree type, int named ATTRIBUTE_UNUSED)
3939 HOST_WIDE_INT bytes, words;
3941 if (mode == BLKmode)
3942 bytes = int_size_in_bytes (type);
3944 bytes = GET_MODE_SIZE (mode);
3945 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3948 mode = type_natural_mode (type);
3950 if (TARGET_64BIT_MS_ABI)
3951 function_arg_advance_ms_64 (cum, bytes, words);
3952 else if (TARGET_64BIT)
3953 function_arg_advance_64 (cum, mode, type, words);
3955 function_arg_advance_32 (cum, mode, type, bytes, words);
3958 /* Define where to put the arguments to a function.
3959 Value is zero to push the argument on the stack,
3960 or a hard register in which to store the argument.
3962 MODE is the argument's machine mode.
3963 TYPE is the data type of the argument (as a tree).
3964 This is null for libcalls where that information may
3966 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3967 the preceding args and about the function being called.
3968 NAMED is nonzero if this argument is a named parameter
3969 (otherwise it is an extra parameter matching an ellipsis). */
3972 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3973 enum machine_mode orig_mode, tree type,
3974 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3976 static bool warnedsse, warnedmmx;
3978 /* Avoid the AL settings for the Unix64 ABI. */
3979 if (mode == VOIDmode)
3995 if (words <= cum->nregs)
3997 int regno = cum->regno;
3999 /* Fastcall allocates the first two DWORD (SImode) or
4000 smaller arguments to ECX and EDX. */
4003 if (mode == BLKmode || mode == DImode)
4006 /* ECX not EAX is the first allocated register. */
4010 return gen_rtx_REG (mode, regno);
4015 if (cum->float_in_sse < 2)
4018 if (cum->float_in_sse < 1)
4028 if (!type || !AGGREGATE_TYPE_P (type))
4030 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4033 warning (0, "SSE vector argument without SSE enabled "
4037 return gen_reg_or_parallel (mode, orig_mode,
4038 cum->sse_regno + FIRST_SSE_REG);
4046 if (!type || !AGGREGATE_TYPE_P (type))
4048 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4051 warning (0, "MMX vector argument without MMX enabled "
4055 return gen_reg_or_parallel (mode, orig_mode,
4056 cum->mmx_regno + FIRST_MMX_REG);
4065 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4066 enum machine_mode orig_mode, tree type)
4068 /* Handle a hidden AL argument containing number of registers
4069 for varargs x86-64 functions. */
4070 if (mode == VOIDmode)
4071 return GEN_INT (cum->maybe_vaarg
4072 ? (cum->sse_nregs < 0
4077 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4079 &x86_64_int_parameter_registers [cum->regno],
4084 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4085 enum machine_mode orig_mode, int named)
4089 /* Avoid the AL settings for the Unix64 ABI. */
4090 if (mode == VOIDmode)
4093 /* If we've run out of registers, it goes on the stack. */
4094 if (cum->nregs == 0)
4097 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4099 /* Only floating point modes are passed in anything but integer regs. */
4100 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4103 regno = cum->regno + FIRST_SSE_REG;
4108 /* Unnamed floating parameters are passed in both the
4109 SSE and integer registers. */
4110 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4111 t2 = gen_rtx_REG (mode, regno);
4112 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4113 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4114 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4118 return gen_reg_or_parallel (mode, orig_mode, regno);
4122 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4123 tree type, int named)
4125 enum machine_mode mode = omode;
4126 HOST_WIDE_INT bytes, words;
4128 if (mode == BLKmode)
4129 bytes = int_size_in_bytes (type);
4131 bytes = GET_MODE_SIZE (mode);
4132 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4134 /* To simplify the code below, represent vector types with a vector mode
4135 even if MMX/SSE are not active. */
4136 if (type && TREE_CODE (type) == VECTOR_TYPE)
4137 mode = type_natural_mode (type);
4139 if (TARGET_64BIT_MS_ABI)
4140 return function_arg_ms_64 (cum, mode, omode, named);
4141 else if (TARGET_64BIT)
4142 return function_arg_64 (cum, mode, omode, type);
4144 return function_arg_32 (cum, mode, omode, type, bytes, words);
4147 /* A C expression that indicates when an argument must be passed by
4148 reference. If nonzero for an argument, a copy of that argument is
4149 made in memory and a pointer to the argument is passed instead of
4150 the argument itself. The pointer is passed in whatever way is
4151 appropriate for passing a pointer to that type. */
4154 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4155 enum machine_mode mode ATTRIBUTE_UNUSED,
4156 tree type, bool named ATTRIBUTE_UNUSED)
4158 if (TARGET_64BIT_MS_ABI)
4162 /* Arrays are passed by reference. */
4163 if (TREE_CODE (type) == ARRAY_TYPE)
4166 if (AGGREGATE_TYPE_P (type))
4168 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4169 are passed by reference. */
4170 int el2 = exact_log2 (int_size_in_bytes (type));
4171 return !(el2 >= 0 && el2 <= 3);
4175 /* __m128 is passed by reference. */
4176 /* ??? How to handle complex? For now treat them as structs,
4177 and pass them by reference if they're too large. */
4178 if (GET_MODE_SIZE (mode) > 8)
4181 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4190 contains_128bit_aligned_vector_p (tree type)
4192 enum machine_mode mode = TYPE_MODE (type);
4193 if (SSE_REG_MODE_P (mode)
4194 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4196 if (TYPE_ALIGN (type) < 128)
4199 if (AGGREGATE_TYPE_P (type))
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type))
4206 case QUAL_UNION_TYPE:
4210 /* Walk all the structure fields. */
4211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4213 if (TREE_CODE (field) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4237 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4241 align = TYPE_ALIGN (type);
4243 align = GET_MODE_ALIGNMENT (mode);
4244 if (align < PARM_BOUNDARY)
4245 align = PARM_BOUNDARY;
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4256 align = PARM_BOUNDARY;
4259 if (!SSE_REG_MODE_P (mode))
4260 align = PARM_BOUNDARY;
4264 if (!contains_128bit_aligned_vector_p (type))
4265 align = PARM_BOUNDARY;
4273 /* Return true if N is a possible register number of function value. */
4276 ix86_function_value_regno_p (int regno)
4283 case FIRST_FLOAT_REG:
4284 if (TARGET_64BIT_MS_ABI)
4286 return TARGET_FLOAT_RETURNS_IN_80387;
4292 if (TARGET_MACHO || TARGET_64BIT)
4300 /* Define how to find the value returned by a function.
4301 VALTYPE is the data type of the value (as a tree).
4302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4303 otherwise, FUNC is 0. */
4306 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4307 tree fntype, tree fn)
4311 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4312 we normally prevent this case when mmx is not available. However
4313 some ABIs may require the result to be returned like DImode. */
4314 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4315 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4317 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4318 we prevent this case when sse is not available. However some ABIs
4319 may require the result to be returned like integer TImode. */
4320 else if (mode == TImode
4321 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4322 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4324 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4325 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4326 regno = FIRST_FLOAT_REG;
4328 /* Most things go in %eax. */
4331 /* Override FP return register with %xmm0 for local functions when
4332 SSE math is enabled or for functions with sseregparm attribute. */
4333 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4335 int sse_level = ix86_function_sseregparm (fntype, fn);
4336 if ((sse_level >= 1 && mode == SFmode)
4337 || (sse_level == 2 && mode == DFmode))
4338 regno = FIRST_SSE_REG;
4341 return gen_rtx_REG (orig_mode, regno);
4345 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4350 /* Handle libcalls, which don't provide a type node. */
4351 if (valtype == NULL)
4363 return gen_rtx_REG (mode, FIRST_SSE_REG);
4366 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4370 return gen_rtx_REG (mode, 0);
4374 ret = construct_container (mode, orig_mode, valtype, 1,
4375 REGPARM_MAX, SSE_REGPARM_MAX,
4376 x86_64_int_return_registers, 0);
4378 /* For zero sized structures, construct_container returns NULL, but we
4379 need to keep rest of compiler happy by returning meaningful value. */
4381 ret = gen_rtx_REG (orig_mode, 0);
4387 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4389 unsigned int regno = 0;
4393 if (mode == SFmode || mode == DFmode)
4394 regno = FIRST_SSE_REG;
4395 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4396 regno = FIRST_SSE_REG;
4399 return gen_rtx_REG (orig_mode, regno);
4403 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4404 enum machine_mode orig_mode, enum machine_mode mode)
4409 if (fntype_or_decl && DECL_P (fntype_or_decl))
4410 fn = fntype_or_decl;
4411 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4413 if (TARGET_64BIT_MS_ABI)
4414 return function_value_ms_64 (orig_mode, mode);
4415 else if (TARGET_64BIT)
4416 return function_value_64 (orig_mode, mode, valtype);
4418 return function_value_32 (orig_mode, mode, fntype, fn);
4422 ix86_function_value (tree valtype, tree fntype_or_decl,
4423 bool outgoing ATTRIBUTE_UNUSED)
4425 enum machine_mode mode, orig_mode;
4427 orig_mode = TYPE_MODE (valtype);
4428 mode = type_natural_mode (valtype);
4429 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4433 ix86_libcall_value (enum machine_mode mode)
4435 return ix86_function_value_1 (NULL, NULL, mode, mode);
4438 /* Return true iff type is returned in memory. */
4441 return_in_memory_32 (tree type, enum machine_mode mode)
4445 if (mode == BLKmode)
4448 size = int_size_in_bytes (type);
4450 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4453 if (VECTOR_MODE_P (mode) || mode == TImode)
4455 /* User-created vectors small enough to fit in EAX. */
4459 /* MMX/3dNow values are returned in MM0,
4460 except when it doesn't exits. */
4462 return (TARGET_MMX ? 0 : 1);
4464 /* SSE values are returned in XMM0, except when it doesn't exist. */
4466 return (TARGET_SSE ? 0 : 1);
4481 return_in_memory_64 (tree type, enum machine_mode mode)
4483 int needed_intregs, needed_sseregs;
4484 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4488 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4490 HOST_WIDE_INT size = int_size_in_bytes (type);
4492 /* __m128 and friends are returned in xmm0. */
4493 if (size == 16 && VECTOR_MODE_P (mode))
4496 /* Otherwise, the size must be exactly in [1248]. */
4497 return (size != 1 && size != 2 && size != 4 && size != 8);
4501 ix86_return_in_memory (tree type)
4503 enum machine_mode mode = type_natural_mode (type);
4505 if (TARGET_64BIT_MS_ABI)
4506 return return_in_memory_ms_64 (type, mode);
4507 else if (TARGET_64BIT)
4508 return return_in_memory_64 (type, mode);
4510 return return_in_memory_32 (type, mode);
4513 /* Return false iff TYPE is returned in memory. This version is used
4514 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4515 but differs notably in that when MMX is available, 8-byte vectors
4516 are returned in memory, rather than in MMX registers. */
4519 ix86_sol10_return_in_memory (tree type)
4522 enum machine_mode mode = type_natural_mode (type);
4525 return return_in_memory_64 (type, mode);
4527 if (mode == BLKmode)
4530 size = int_size_in_bytes (type);
4532 if (VECTOR_MODE_P (mode))
4534 /* Return in memory only if MMX registers *are* available. This
4535 seems backwards, but it is consistent with the existing
4542 else if (mode == TImode)
4544 else if (mode == XFmode)
4550 /* When returning SSE vector types, we have a choice of either
4551 (1) being abi incompatible with a -march switch, or
4552 (2) generating an error.
4553 Given no good solution, I think the safest thing is one warning.
4554 The user won't be able to use -Werror, but....
4556 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4557 called in response to actually generating a caller or callee that
4558 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4559 via aggregate_value_p for general type probing from tree-ssa. */
4562 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4564 static bool warnedsse, warnedmmx;
4566 if (!TARGET_64BIT && type)
4568 /* Look at the return type of the function, not the function type. */
4569 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4571 if (!TARGET_SSE && !warnedsse)
4574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4577 warning (0, "SSE vector return without SSE enabled "
4582 if (!TARGET_MMX && !warnedmmx)
4584 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4587 warning (0, "MMX vector return without MMX enabled "
4597 /* Create the va_list data type. */
4600 ix86_build_builtin_va_list (void)
4602 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4604 /* For i386 we use plain pointer to argument area. */
4605 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4606 return build_pointer_type (char_type_node);
4608 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4609 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4611 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4612 unsigned_type_node);
4613 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4614 unsigned_type_node);
4615 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4617 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4620 va_list_gpr_counter_field = f_gpr;
4621 va_list_fpr_counter_field = f_fpr;
4623 DECL_FIELD_CONTEXT (f_gpr) = record;
4624 DECL_FIELD_CONTEXT (f_fpr) = record;
4625 DECL_FIELD_CONTEXT (f_ovf) = record;
4626 DECL_FIELD_CONTEXT (f_sav) = record;
4628 TREE_CHAIN (record) = type_decl;
4629 TYPE_NAME (record) = type_decl;
4630 TYPE_FIELDS (record) = f_gpr;
4631 TREE_CHAIN (f_gpr) = f_fpr;
4632 TREE_CHAIN (f_fpr) = f_ovf;
4633 TREE_CHAIN (f_ovf) = f_sav;
4635 layout_type (record);
4637 /* The correct type is an array type of one element. */
4638 return build_array_type (record, build_index_type (size_zero_node));
4641 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4644 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4654 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4657 /* Indicate to allocate space on the stack for varargs save area. */
4658 ix86_save_varrargs_registers = 1;
4659 cfun->stack_alignment_needed = 128;
4661 save_area = frame_pointer_rtx;
4662 set = get_varargs_alias_set ();
4664 for (i = cum->regno;
4666 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4669 mem = gen_rtx_MEM (Pmode,
4670 plus_constant (save_area, i * UNITS_PER_WORD));
4671 MEM_NOTRAP_P (mem) = 1;
4672 set_mem_alias_set (mem, set);
4673 emit_move_insn (mem, gen_rtx_REG (Pmode,
4674 x86_64_int_parameter_registers[i]));
4677 if (cum->sse_nregs && cfun->va_list_fpr_size)
4679 /* Now emit code to save SSE registers. The AX parameter contains number
4680 of SSE parameter registers used to call this function. We use
4681 sse_prologue_save insn template that produces computed jump across
4682 SSE saves. We need some preparation work to get this working. */
4684 label = gen_label_rtx ();
4685 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4687 /* Compute address to jump to :
4688 label - 5*eax + nnamed_sse_arguments*5 */
4689 tmp_reg = gen_reg_rtx (Pmode);
4690 nsse_reg = gen_reg_rtx (Pmode);
4691 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4692 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4693 gen_rtx_MULT (Pmode, nsse_reg,
4698 gen_rtx_CONST (DImode,
4699 gen_rtx_PLUS (DImode,
4701 GEN_INT (cum->sse_regno * 4))));
4703 emit_move_insn (nsse_reg, label_ref);
4704 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4706 /* Compute address of memory block we save into. We always use pointer
4707 pointing 127 bytes after first byte to store - this is needed to keep
4708 instruction size limited by 4 bytes. */
4709 tmp_reg = gen_reg_rtx (Pmode);
4710 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4711 plus_constant (save_area,
4712 8 * REGPARM_MAX + 127)));
4713 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4714 MEM_NOTRAP_P (mem) = 1;
4715 set_mem_alias_set (mem, set);
4716 set_mem_align (mem, BITS_PER_WORD);
4718 /* And finally do the dirty job! */
4719 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4720 GEN_INT (cum->sse_regno), label));
4725 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4727 int set = get_varargs_alias_set ();
4730 for (i = cum->regno; i < REGPARM_MAX; i++)
4734 mem = gen_rtx_MEM (Pmode,
4735 plus_constant (virtual_incoming_args_rtx,
4736 i * UNITS_PER_WORD));
4737 MEM_NOTRAP_P (mem) = 1;
4738 set_mem_alias_set (mem, set);
4740 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4741 emit_move_insn (mem, reg);
4746 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4747 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4750 CUMULATIVE_ARGS next_cum;
4754 /* This argument doesn't appear to be used anymore. Which is good,
4755 because the old code here didn't suppress rtl generation. */
4756 gcc_assert (!no_rtl);
4761 fntype = TREE_TYPE (current_function_decl);
4762 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4763 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4764 != void_type_node));
4766 /* For varargs, we do not want to skip the dummy va_dcl argument.
4767 For stdargs, we do want to skip the last named argument. */
4770 function_arg_advance (&next_cum, mode, type, 1);
4772 if (TARGET_64BIT_MS_ABI)
4773 setup_incoming_varargs_ms_64 (&next_cum);
4775 setup_incoming_varargs_64 (&next_cum);
4778 /* Implement va_start. */
4781 ix86_va_start (tree valist, rtx nextarg)
4783 HOST_WIDE_INT words, n_gpr, n_fpr;
4784 tree f_gpr, f_fpr, f_ovf, f_sav;
4785 tree gpr, fpr, ovf, sav, t;
4788 /* Only 64bit target needs something special. */
4789 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4791 std_expand_builtin_va_start (valist, nextarg);
4795 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4796 f_fpr = TREE_CHAIN (f_gpr);
4797 f_ovf = TREE_CHAIN (f_fpr);
4798 f_sav = TREE_CHAIN (f_ovf);
4800 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4801 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4802 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4803 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4804 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4806 /* Count number of gp and fp argument registers used. */
4807 words = current_function_args_info.words;
4808 n_gpr = current_function_args_info.regno;
4809 n_fpr = current_function_args_info.sse_regno;
4811 if (cfun->va_list_gpr_size)
4813 type = TREE_TYPE (gpr);
4814 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4815 build_int_cst (type, n_gpr * 8));
4816 TREE_SIDE_EFFECTS (t) = 1;
4817 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4820 if (cfun->va_list_fpr_size)
4822 type = TREE_TYPE (fpr);
4823 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4824 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4825 TREE_SIDE_EFFECTS (t) = 1;
4826 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4829 /* Find the overflow area. */
4830 type = TREE_TYPE (ovf);
4831 t = make_tree (type, virtual_incoming_args_rtx);
4833 t = build2 (POINTER_PLUS_EXPR, type, t,
4834 size_int (words * UNITS_PER_WORD));
4835 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4836 TREE_SIDE_EFFECTS (t) = 1;
4837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4839 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4841 /* Find the register save area.
4842 Prologue of the function save it right above stack frame. */
4843 type = TREE_TYPE (sav);
4844 t = make_tree (type, frame_pointer_rtx);
4845 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4846 TREE_SIDE_EFFECTS (t) = 1;
4847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4851 /* Implement va_arg. */
4854 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4856 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4857 tree f_gpr, f_fpr, f_ovf, f_sav;
4858 tree gpr, fpr, ovf, sav, t;
4860 tree lab_false, lab_over = NULL_TREE;
4865 enum machine_mode nat_mode;
4867 /* Only 64bit target needs something special. */
4868 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4869 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4871 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4872 f_fpr = TREE_CHAIN (f_gpr);
4873 f_ovf = TREE_CHAIN (f_fpr);
4874 f_sav = TREE_CHAIN (f_ovf);
4876 valist = build_va_arg_indirect_ref (valist);
4877 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4878 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4879 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4880 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4882 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4884 type = build_pointer_type (type);
4885 size = int_size_in_bytes (type);
4886 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4888 nat_mode = type_natural_mode (type);
4889 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4890 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4892 /* Pull the value out of the saved registers. */
4894 addr = create_tmp_var (ptr_type_node, "addr");
4895 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4899 int needed_intregs, needed_sseregs;
4901 tree int_addr, sse_addr;
4903 lab_false = create_artificial_label ();
4904 lab_over = create_artificial_label ();
4906 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4908 need_temp = (!REG_P (container)
4909 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4910 || TYPE_ALIGN (type) > 128));
4912 /* In case we are passing structure, verify that it is consecutive block
4913 on the register save area. If not we need to do moves. */
4914 if (!need_temp && !REG_P (container))
4916 /* Verify that all registers are strictly consecutive */
4917 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4923 rtx slot = XVECEXP (container, 0, i);
4924 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4925 || INTVAL (XEXP (slot, 1)) != i * 16)
4933 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4935 rtx slot = XVECEXP (container, 0, i);
4936 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4937 || INTVAL (XEXP (slot, 1)) != i * 8)
4949 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4950 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4951 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4952 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4955 /* First ensure that we fit completely in registers. */
4958 t = build_int_cst (TREE_TYPE (gpr),
4959 (REGPARM_MAX - needed_intregs + 1) * 8);
4960 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4963 gimplify_and_add (t, pre_p);
4967 t = build_int_cst (TREE_TYPE (fpr),
4968 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4970 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4973 gimplify_and_add (t, pre_p);
4976 /* Compute index to start of area used for integer regs. */
4979 /* int_addr = gpr + sav; */
4980 t = fold_convert (sizetype, gpr);
4981 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4982 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4983 gimplify_and_add (t, pre_p);
4987 /* sse_addr = fpr + sav; */
4988 t = fold_convert (sizetype, fpr);
4989 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
4990 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4991 gimplify_and_add (t, pre_p);
4996 tree temp = create_tmp_var (type, "va_arg_tmp");
4999 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5000 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5001 gimplify_and_add (t, pre_p);
5003 for (i = 0; i < XVECLEN (container, 0); i++)
5005 rtx slot = XVECEXP (container, 0, i);
5006 rtx reg = XEXP (slot, 0);
5007 enum machine_mode mode = GET_MODE (reg);
5008 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5009 tree addr_type = build_pointer_type (piece_type);
5012 tree dest_addr, dest;
5014 if (SSE_REGNO_P (REGNO (reg)))
5016 src_addr = sse_addr;
5017 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5021 src_addr = int_addr;
5022 src_offset = REGNO (reg) * 8;
5024 src_addr = fold_convert (addr_type, src_addr);
5025 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5026 size_int (src_offset));
5027 src = build_va_arg_indirect_ref (src_addr);
5029 dest_addr = fold_convert (addr_type, addr);
5030 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5031 size_int (INTVAL (XEXP (slot, 1))));
5032 dest = build_va_arg_indirect_ref (dest_addr);
5034 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5035 gimplify_and_add (t, pre_p);
5041 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5042 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5043 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5044 gimplify_and_add (t, pre_p);
5048 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5049 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5050 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5051 gimplify_and_add (t, pre_p);
5054 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5055 gimplify_and_add (t, pre_p);
5057 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5058 append_to_statement_list (t, pre_p);
5061 /* ... otherwise out of the overflow area. */
5063 /* Care for on-stack alignment if needed. */
5064 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5065 || integer_zerop (TYPE_SIZE (type)))
5069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5070 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5071 size_int (align - 1));
5072 t = fold_convert (sizetype, t);
5073 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5075 t = fold_convert (TREE_TYPE (ovf), t);
5077 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5079 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5080 gimplify_and_add (t2, pre_p);
5082 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5083 size_int (rsize * UNITS_PER_WORD));
5084 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5085 gimplify_and_add (t, pre_p);
5089 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5090 append_to_statement_list (t, pre_p);
5093 ptrtype = build_pointer_type (type);
5094 addr = fold_convert (ptrtype, addr);
5097 addr = build_va_arg_indirect_ref (addr);
5098 return build_va_arg_indirect_ref (addr);
5101 /* Return nonzero if OPNUM's MEM should be matched
5102 in movabs* patterns. */
5105 ix86_check_movabs (rtx insn, int opnum)
5109 set = PATTERN (insn);
5110 if (GET_CODE (set) == PARALLEL)
5111 set = XVECEXP (set, 0, 0);
5112 gcc_assert (GET_CODE (set) == SET);
5113 mem = XEXP (set, opnum);
5114 while (GET_CODE (mem) == SUBREG)
5115 mem = SUBREG_REG (mem);
5116 gcc_assert (MEM_P (mem));
5117 return (volatile_ok || !MEM_VOLATILE_P (mem));
5120 /* Initialize the table of extra 80387 mathematical constants. */
5123 init_ext_80387_constants (void)
5125 static const char * cst[5] =
5127 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5128 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5129 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5130 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5131 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5135 for (i = 0; i < 5; i++)
5137 real_from_string (&ext_80387_constants_table[i], cst[i]);
5138 /* Ensure each constant is rounded to XFmode precision. */
5139 real_convert (&ext_80387_constants_table[i],
5140 XFmode, &ext_80387_constants_table[i]);
5143 ext_80387_constants_init = 1;
5146 /* Return true if the constant is something that can be loaded with
5147 a special instruction. */
5150 standard_80387_constant_p (rtx x)
5152 enum machine_mode mode = GET_MODE (x);
5156 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5159 if (x == CONST0_RTX (mode))
5161 if (x == CONST1_RTX (mode))
5164 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5166 /* For XFmode constants, try to find a special 80387 instruction when
5167 optimizing for size or on those CPUs that benefit from them. */
5169 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5173 if (! ext_80387_constants_init)
5174 init_ext_80387_constants ();
5176 for (i = 0; i < 5; i++)
5177 if (real_identical (&r, &ext_80387_constants_table[i]))
5181 /* Load of the constant -0.0 or -1.0 will be split as
5182 fldz;fchs or fld1;fchs sequence. */
5183 if (real_isnegzero (&r))
5185 if (real_identical (&r, &dconstm1))
5191 /* Return the opcode of the special instruction to be used to load
5195 standard_80387_constant_opcode (rtx x)
5197 switch (standard_80387_constant_p (x))
5221 /* Return the CONST_DOUBLE representing the 80387 constant that is
5222 loaded by the specified special instruction. The argument IDX
5223 matches the return value from standard_80387_constant_p. */
5226 standard_80387_constant_rtx (int idx)
5230 if (! ext_80387_constants_init)
5231 init_ext_80387_constants ();
5247 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5251 /* Return 1 if mode is a valid mode for sse. */
5253 standard_sse_mode_p (enum machine_mode mode)
5270 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5273 standard_sse_constant_p (rtx x)
5275 enum machine_mode mode = GET_MODE (x);
5277 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5279 if (vector_all_ones_operand (x, mode)
5280 && standard_sse_mode_p (mode))
5281 return TARGET_SSE2 ? 2 : -1;
5286 /* Return the opcode of the special instruction to be used to load
5290 standard_sse_constant_opcode (rtx insn, rtx x)
5292 switch (standard_sse_constant_p (x))
5295 if (get_attr_mode (insn) == MODE_V4SF)
5296 return "xorps\t%0, %0";
5297 else if (get_attr_mode (insn) == MODE_V2DF)
5298 return "xorpd\t%0, %0";
5300 return "pxor\t%0, %0";
5302 return "pcmpeqd\t%0, %0";
5307 /* Returns 1 if OP contains a symbol reference */
5310 symbolic_reference_mentioned_p (rtx op)
5315 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5318 fmt = GET_RTX_FORMAT (GET_CODE (op));
5319 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5325 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5326 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5330 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5337 /* Return 1 if it is appropriate to emit `ret' instructions in the
5338 body of a function. Do this only if the epilogue is simple, needing a
5339 couple of insns. Prior to reloading, we can't tell how many registers
5340 must be saved, so return 0 then. Return 0 if there is no frame
5341 marker to de-allocate. */
5344 ix86_can_use_return_insn_p (void)
5346 struct ix86_frame frame;
5348 if (! reload_completed || frame_pointer_needed)
5351 /* Don't allow more than 32 pop, since that's all we can do
5352 with one instruction. */
5353 if (current_function_pops_args
5354 && current_function_args_size >= 32768)
5357 ix86_compute_frame_layout (&frame);
5358 return frame.to_allocate == 0 && frame.nregs == 0;
5361 /* Value should be nonzero if functions must have frame pointers.
5362 Zero means the frame pointer need not be set up (and parms may
5363 be accessed via the stack pointer) in functions that seem suitable. */
5366 ix86_frame_pointer_required (void)
5368 /* If we accessed previous frames, then the generated code expects
5369 to be able to access the saved ebp value in our frame. */
5370 if (cfun->machine->accesses_prev_frame)
5373 /* Several x86 os'es need a frame pointer for other reasons,
5374 usually pertaining to setjmp. */
5375 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5378 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5379 the frame pointer by default. Turn it back on now if we've not
5380 got a leaf function. */
5381 if (TARGET_OMIT_LEAF_FRAME_POINTER
5382 && (!current_function_is_leaf
5383 || ix86_current_function_calls_tls_descriptor))
5386 if (current_function_profile)
5392 /* Record that the current function accesses previous call frames. */
5395 ix86_setup_frame_addresses (void)
5397 cfun->machine->accesses_prev_frame = 1;
5400 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5401 # define USE_HIDDEN_LINKONCE 1
5403 # define USE_HIDDEN_LINKONCE 0
5406 static int pic_labels_used;
5408 /* Fills in the label name that should be used for a pc thunk for
5409 the given register. */
5412 get_pc_thunk_name (char name[32], unsigned int regno)
5414 gcc_assert (!TARGET_64BIT);
5416 if (USE_HIDDEN_LINKONCE)
5417 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5419 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5423 /* This function generates code for -fpic that loads %ebx with
5424 the return address of the caller and then returns. */
5427 ix86_file_end (void)
5432 for (regno = 0; regno < 8; ++regno)
5436 if (! ((pic_labels_used >> regno) & 1))
5439 get_pc_thunk_name (name, regno);
5444 switch_to_section (darwin_sections[text_coal_section]);
5445 fputs ("\t.weak_definition\t", asm_out_file);
5446 assemble_name (asm_out_file, name);
5447 fputs ("\n\t.private_extern\t", asm_out_file);
5448 assemble_name (asm_out_file, name);
5449 fputs ("\n", asm_out_file);
5450 ASM_OUTPUT_LABEL (asm_out_file, name);
5454 if (USE_HIDDEN_LINKONCE)
5458 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5460 TREE_PUBLIC (decl) = 1;
5461 TREE_STATIC (decl) = 1;
5462 DECL_ONE_ONLY (decl) = 1;
5464 (*targetm.asm_out.unique_section) (decl, 0);
5465 switch_to_section (get_named_section (decl, NULL, 0));
5467 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5468 fputs ("\t.hidden\t", asm_out_file);
5469 assemble_name (asm_out_file, name);
5470 fputc ('\n', asm_out_file);
5471 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5475 switch_to_section (text_section);
5476 ASM_OUTPUT_LABEL (asm_out_file, name);
5479 xops[0] = gen_rtx_REG (SImode, regno);
5480 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5481 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5482 output_asm_insn ("ret", xops);
5485 if (NEED_INDICATE_EXEC_STACK)
5486 file_end_indicate_exec_stack ();
5489 /* Emit code for the SET_GOT patterns. */
5492 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5498 if (TARGET_VXWORKS_RTP && flag_pic)
5500 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5501 xops[2] = gen_rtx_MEM (Pmode,
5502 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5503 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5505 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5506 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5507 an unadorned address. */
5508 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5509 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5510 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5514 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5516 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5518 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5521 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5523 output_asm_insn ("call\t%a2", xops);
5526 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5527 is what will be referenced by the Mach-O PIC subsystem. */
5529 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5532 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5533 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5536 output_asm_insn ("pop{l}\t%0", xops);
5541 get_pc_thunk_name (name, REGNO (dest));
5542 pic_labels_used |= 1 << REGNO (dest);
5544 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5545 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5546 output_asm_insn ("call\t%X2", xops);
5547 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5548 is what will be referenced by the Mach-O PIC subsystem. */
5551 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5553 targetm.asm_out.internal_label (asm_out_file, "L",
5554 CODE_LABEL_NUMBER (label));
5561 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5562 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5564 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5569 /* Generate an "push" pattern for input ARG. */
5574 return gen_rtx_SET (VOIDmode,
5576 gen_rtx_PRE_DEC (Pmode,
5577 stack_pointer_rtx)),
5581 /* Return >= 0 if there is an unused call-clobbered register available
5582 for the entire function. */
5585 ix86_select_alt_pic_regnum (void)
5587 if (current_function_is_leaf && !current_function_profile
5588 && !ix86_current_function_calls_tls_descriptor)
5591 for (i = 2; i >= 0; --i)
5592 if (!df_regs_ever_live_p (i))
5596 return INVALID_REGNUM;
5599 /* Return 1 if we need to save REGNO. */
5601 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5603 if (pic_offset_table_rtx
5604 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5605 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5606 || current_function_profile
5607 || current_function_calls_eh_return
5608 || current_function_uses_const_pool))
5610 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5615 if (current_function_calls_eh_return && maybe_eh_return)
5620 unsigned test = EH_RETURN_DATA_REGNO (i);
5621 if (test == INVALID_REGNUM)
5628 if (cfun->machine->force_align_arg_pointer
5629 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5632 return (df_regs_ever_live_p (regno)
5633 && !call_used_regs[regno]
5634 && !fixed_regs[regno]
5635 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5638 /* Return number of registers to be saved on the stack. */
5641 ix86_nsaved_regs (void)
5646 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5647 if (ix86_save_reg (regno, true))
5652 /* Return the offset between two registers, one to be eliminated, and the other
5653 its replacement, at the start of a routine. */
5656 ix86_initial_elimination_offset (int from, int to)
5658 struct ix86_frame frame;
5659 ix86_compute_frame_layout (&frame);
5661 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5662 return frame.hard_frame_pointer_offset;
5663 else if (from == FRAME_POINTER_REGNUM
5664 && to == HARD_FRAME_POINTER_REGNUM)
5665 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5668 gcc_assert (to == STACK_POINTER_REGNUM);
5670 if (from == ARG_POINTER_REGNUM)
5671 return frame.stack_pointer_offset;
5673 gcc_assert (from == FRAME_POINTER_REGNUM);
5674 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5678 /* Fill structure ix86_frame about frame of currently computed function. */
5681 ix86_compute_frame_layout (struct ix86_frame *frame)
5683 HOST_WIDE_INT total_size;
5684 unsigned int stack_alignment_needed;
5685 HOST_WIDE_INT offset;
5686 unsigned int preferred_alignment;
5687 HOST_WIDE_INT size = get_frame_size ();
5689 frame->nregs = ix86_nsaved_regs ();
5692 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5693 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5695 /* During reload iteration the amount of registers saved can change.
5696 Recompute the value as needed. Do not recompute when amount of registers
5697 didn't change as reload does multiple calls to the function and does not
5698 expect the decision to change within single iteration. */
5700 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5702 int count = frame->nregs;
5704 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5705 /* The fast prologue uses move instead of push to save registers. This
5706 is significantly longer, but also executes faster as modern hardware
5707 can execute the moves in parallel, but can't do that for push/pop.
5709 Be careful about choosing what prologue to emit: When function takes
5710 many instructions to execute we may use slow version as well as in
5711 case function is known to be outside hot spot (this is known with
5712 feedback only). Weight the size of function by number of registers
5713 to save as it is cheap to use one or two push instructions but very
5714 slow to use many of them. */
5716 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5717 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5718 || (flag_branch_probabilities
5719 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5720 cfun->machine->use_fast_prologue_epilogue = false;
5722 cfun->machine->use_fast_prologue_epilogue
5723 = !expensive_function_p (count);
5725 if (TARGET_PROLOGUE_USING_MOVE
5726 && cfun->machine->use_fast_prologue_epilogue)
5727 frame->save_regs_using_mov = true;
5729 frame->save_regs_using_mov = false;
5732 /* Skip return address and saved base pointer. */
5733 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5735 frame->hard_frame_pointer_offset = offset;
5737 /* Do some sanity checking of stack_alignment_needed and
5738 preferred_alignment, since i386 port is the only using those features
5739 that may break easily. */
5741 gcc_assert (!size || stack_alignment_needed);
5742 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5743 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5744 gcc_assert (stack_alignment_needed
5745 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5747 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5748 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5750 /* Register save area */
5751 offset += frame->nregs * UNITS_PER_WORD;
5754 if (ix86_save_varrargs_registers)
5756 offset += X86_64_VARARGS_SIZE;
5757 frame->va_arg_size = X86_64_VARARGS_SIZE;
5760 frame->va_arg_size = 0;
5762 /* Align start of frame for local function. */
5763 frame->padding1 = ((offset + stack_alignment_needed - 1)
5764 & -stack_alignment_needed) - offset;
5766 offset += frame->padding1;
5768 /* Frame pointer points here. */
5769 frame->frame_pointer_offset = offset;
5773 /* Add outgoing arguments area. Can be skipped if we eliminated
5774 all the function calls as dead code.
5775 Skipping is however impossible when function calls alloca. Alloca
5776 expander assumes that last current_function_outgoing_args_size
5777 of stack frame are unused. */
5778 if (ACCUMULATE_OUTGOING_ARGS
5779 && (!current_function_is_leaf || current_function_calls_alloca
5780 || ix86_current_function_calls_tls_descriptor))
5782 offset += current_function_outgoing_args_size;
5783 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5786 frame->outgoing_arguments_size = 0;
5788 /* Align stack boundary. Only needed if we're calling another function
5790 if (!current_function_is_leaf || current_function_calls_alloca
5791 || ix86_current_function_calls_tls_descriptor)
5792 frame->padding2 = ((offset + preferred_alignment - 1)
5793 & -preferred_alignment) - offset;
5795 frame->padding2 = 0;
5797 offset += frame->padding2;
5799 /* We've reached end of stack frame. */
5800 frame->stack_pointer_offset = offset;
5802 /* Size prologue needs to allocate. */
5803 frame->to_allocate =
5804 (size + frame->padding1 + frame->padding2
5805 + frame->outgoing_arguments_size + frame->va_arg_size);
5807 if ((!frame->to_allocate && frame->nregs <= 1)
5808 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5809 frame->save_regs_using_mov = false;
5811 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5812 && current_function_is_leaf
5813 && !ix86_current_function_calls_tls_descriptor)
5815 frame->red_zone_size = frame->to_allocate;
5816 if (frame->save_regs_using_mov)
5817 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5818 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5819 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5822 frame->red_zone_size = 0;
5823 frame->to_allocate -= frame->red_zone_size;
5824 frame->stack_pointer_offset -= frame->red_zone_size;
5826 fprintf (stderr, "\n");
5827 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5828 fprintf (stderr, "size: %ld\n", (long)size);
5829 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5830 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5831 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5832 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5833 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5834 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5835 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5836 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5837 (long)frame->hard_frame_pointer_offset);
5838 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5839 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5840 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5841 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5845 /* Emit code to save registers in the prologue. */
5848 ix86_emit_save_regs (void)
5853 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5854 if (ix86_save_reg (regno, true))
5856 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5857 RTX_FRAME_RELATED_P (insn) = 1;
5861 /* Emit code to save registers using MOV insns. First register
5862 is restored from POINTER + OFFSET. */
5864 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5869 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5870 if (ix86_save_reg (regno, true))
5872 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5874 gen_rtx_REG (Pmode, regno));
5875 RTX_FRAME_RELATED_P (insn) = 1;
5876 offset += UNITS_PER_WORD;
5880 /* Expand prologue or epilogue stack adjustment.
5881 The pattern exist to put a dependency on all ebp-based memory accesses.
5882 STYLE should be negative if instructions should be marked as frame related,
5883 zero if %r11 register is live and cannot be freely used and positive
5887 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5892 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5893 else if (x86_64_immediate_operand (offset, DImode))
5894 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5898 /* r11 is used by indirect sibcall return as well, set before the
5899 epilogue and used after the epilogue. ATM indirect sibcall
5900 shouldn't be used together with huge frame sizes in one
5901 function because of the frame_size check in sibcall.c. */
5903 r11 = gen_rtx_REG (DImode, R11_REG);
5904 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5906 RTX_FRAME_RELATED_P (insn) = 1;
5907 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5911 RTX_FRAME_RELATED_P (insn) = 1;
5914 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5917 ix86_internal_arg_pointer (void)
5919 bool has_force_align_arg_pointer =
5920 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5921 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5922 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5923 && DECL_NAME (current_function_decl)
5924 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5925 && DECL_FILE_SCOPE_P (current_function_decl))
5926 || ix86_force_align_arg_pointer
5927 || has_force_align_arg_pointer)
5929 /* Nested functions can't realign the stack due to a register
5931 if (DECL_CONTEXT (current_function_decl)
5932 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5934 if (ix86_force_align_arg_pointer)
5935 warning (0, "-mstackrealign ignored for nested functions");
5936 if (has_force_align_arg_pointer)
5937 error ("%s not supported for nested functions",
5938 ix86_force_align_arg_pointer_string);
5939 return virtual_incoming_args_rtx;
5941 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5942 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5945 return virtual_incoming_args_rtx;
5948 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5949 This is called from dwarf2out.c to emit call frame instructions
5950 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5952 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5954 rtx unspec = SET_SRC (pattern);
5955 gcc_assert (GET_CODE (unspec) == UNSPEC);
5959 case UNSPEC_REG_SAVE:
5960 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5961 SET_DEST (pattern));
5963 case UNSPEC_DEF_CFA:
5964 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5965 INTVAL (XVECEXP (unspec, 0, 0)));
5972 /* Expand the prologue into a bunch of separate insns. */
5975 ix86_expand_prologue (void)
5979 struct ix86_frame frame;
5980 HOST_WIDE_INT allocate;
5982 ix86_compute_frame_layout (&frame);
5984 if (cfun->machine->force_align_arg_pointer)
5988 /* Grab the argument pointer. */
5989 x = plus_constant (stack_pointer_rtx, 4);
5990 y = cfun->machine->force_align_arg_pointer;
5991 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5992 RTX_FRAME_RELATED_P (insn) = 1;
5994 /* The unwind info consists of two parts: install the fafp as the cfa,
5995 and record the fafp as the "save register" of the stack pointer.
5996 The later is there in order that the unwinder can see where it
5997 should restore the stack pointer across the and insn. */
5998 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5999 x = gen_rtx_SET (VOIDmode, y, x);
6000 RTX_FRAME_RELATED_P (x) = 1;
6001 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6003 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6004 RTX_FRAME_RELATED_P (y) = 1;
6005 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6006 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6007 REG_NOTES (insn) = x;
6009 /* Align the stack. */
6010 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6013 /* And here we cheat like madmen with the unwind info. We force the
6014 cfa register back to sp+4, which is exactly what it was at the
6015 start of the function. Re-pushing the return address results in
6016 the return at the same spot relative to the cfa, and thus is
6017 correct wrt the unwind info. */
6018 x = cfun->machine->force_align_arg_pointer;
6019 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6020 insn = emit_insn (gen_push (x));
6021 RTX_FRAME_RELATED_P (insn) = 1;
6024 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6025 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6026 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6027 REG_NOTES (insn) = x;
6030 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6031 slower on all targets. Also sdb doesn't like it. */
6033 if (frame_pointer_needed)
6035 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6036 RTX_FRAME_RELATED_P (insn) = 1;
6038 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6039 RTX_FRAME_RELATED_P (insn) = 1;
6042 allocate = frame.to_allocate;
6044 if (!frame.save_regs_using_mov)
6045 ix86_emit_save_regs ();
6047 allocate += frame.nregs * UNITS_PER_WORD;
6049 /* When using red zone we may start register saving before allocating
6050 the stack frame saving one cycle of the prologue. */
6051 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6052 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6053 : stack_pointer_rtx,
6054 -frame.nregs * UNITS_PER_WORD);
6058 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6059 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6060 GEN_INT (-allocate), -1);
6063 /* Only valid for Win32. */
6064 rtx eax = gen_rtx_REG (Pmode, 0);
6068 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6070 if (TARGET_64BIT_MS_ABI)
6073 eax_live = ix86_eax_live_at_start_p ();
6077 emit_insn (gen_push (eax));
6078 allocate -= UNITS_PER_WORD;
6081 emit_move_insn (eax, GEN_INT (allocate));
6084 insn = gen_allocate_stack_worker_64 (eax);
6086 insn = gen_allocate_stack_worker_32 (eax);
6087 insn = emit_insn (insn);
6088 RTX_FRAME_RELATED_P (insn) = 1;
6089 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6090 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6091 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6092 t, REG_NOTES (insn));
6096 if (frame_pointer_needed)
6097 t = plus_constant (hard_frame_pointer_rtx,
6100 - frame.nregs * UNITS_PER_WORD);
6102 t = plus_constant (stack_pointer_rtx, allocate);
6103 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6107 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6109 if (!frame_pointer_needed || !frame.to_allocate)
6110 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6112 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6113 -frame.nregs * UNITS_PER_WORD);
6116 pic_reg_used = false;
6117 if (pic_offset_table_rtx
6118 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6119 || current_function_profile))
6121 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6123 if (alt_pic_reg_used != INVALID_REGNUM)
6124 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6126 pic_reg_used = true;
6133 if (ix86_cmodel == CM_LARGE_PIC)
6135 rtx tmp_reg = gen_rtx_REG (DImode,
6136 FIRST_REX_INT_REG + 3 /* R11 */);
6137 rtx label = gen_label_rtx ();
6139 LABEL_PRESERVE_P (label) = 1;
6140 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6141 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6142 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6143 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6144 pic_offset_table_rtx, tmp_reg));
6147 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6150 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6153 /* Prevent function calls from be scheduled before the call to mcount.
6154 In the pic_reg_used case, make sure that the got load isn't deleted. */
6155 if (current_function_profile)
6158 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6159 emit_insn (gen_blockage ());
6163 /* Emit code to restore saved registers using MOV insns. First register
6164 is restored from POINTER + OFFSET. */
6166 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6167 int maybe_eh_return)
6170 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6172 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6173 if (ix86_save_reg (regno, maybe_eh_return))
6175 /* Ensure that adjust_address won't be forced to produce pointer
6176 out of range allowed by x86-64 instruction set. */
6177 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6181 r11 = gen_rtx_REG (DImode, R11_REG);
6182 emit_move_insn (r11, GEN_INT (offset));
6183 emit_insn (gen_adddi3 (r11, r11, pointer));
6184 base_address = gen_rtx_MEM (Pmode, r11);
6187 emit_move_insn (gen_rtx_REG (Pmode, regno),
6188 adjust_address (base_address, Pmode, offset));
6189 offset += UNITS_PER_WORD;
6193 /* Restore function stack, frame, and registers. */
6196 ix86_expand_epilogue (int style)
6199 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6200 struct ix86_frame frame;
6201 HOST_WIDE_INT offset;
6203 ix86_compute_frame_layout (&frame);
6205 /* Calculate start of saved registers relative to ebp. Special care
6206 must be taken for the normal return case of a function using
6207 eh_return: the eax and edx registers are marked as saved, but not
6208 restored along this path. */
6209 offset = frame.nregs;
6210 if (current_function_calls_eh_return && style != 2)
6212 offset *= -UNITS_PER_WORD;
6214 /* If we're only restoring one register and sp is not valid then
6215 using a move instruction to restore the register since it's
6216 less work than reloading sp and popping the register.
6218 The default code result in stack adjustment using add/lea instruction,
6219 while this code results in LEAVE instruction (or discrete equivalent),
6220 so it is profitable in some other cases as well. Especially when there
6221 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6222 and there is exactly one register to pop. This heuristic may need some
6223 tuning in future. */
6224 if ((!sp_valid && frame.nregs <= 1)
6225 || (TARGET_EPILOGUE_USING_MOVE
6226 && cfun->machine->use_fast_prologue_epilogue
6227 && (frame.nregs > 1 || frame.to_allocate))
6228 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6229 || (frame_pointer_needed && TARGET_USE_LEAVE
6230 && cfun->machine->use_fast_prologue_epilogue
6231 && frame.nregs == 1)
6232 || current_function_calls_eh_return)
6234 /* Restore registers. We can use ebp or esp to address the memory
6235 locations. If both are available, default to ebp, since offsets
6236 are known to be small. Only exception is esp pointing directly to the
6237 end of block of saved registers, where we may simplify addressing
6240 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6241 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6242 frame.to_allocate, style == 2);
6244 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6245 offset, style == 2);
6247 /* eh_return epilogues need %ecx added to the stack pointer. */
6250 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6252 if (frame_pointer_needed)
6254 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6255 tmp = plus_constant (tmp, UNITS_PER_WORD);
6256 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6258 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6259 emit_move_insn (hard_frame_pointer_rtx, tmp);
6261 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6266 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6267 tmp = plus_constant (tmp, (frame.to_allocate
6268 + frame.nregs * UNITS_PER_WORD));
6269 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6272 else if (!frame_pointer_needed)
6273 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6274 GEN_INT (frame.to_allocate
6275 + frame.nregs * UNITS_PER_WORD),
6277 /* If not an i386, mov & pop is faster than "leave". */
6278 else if (TARGET_USE_LEAVE || optimize_size
6279 || !cfun->machine->use_fast_prologue_epilogue)
6280 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6283 pro_epilogue_adjust_stack (stack_pointer_rtx,
6284 hard_frame_pointer_rtx,
6287 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6289 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6294 /* First step is to deallocate the stack frame so that we can
6295 pop the registers. */
6298 gcc_assert (frame_pointer_needed);
6299 pro_epilogue_adjust_stack (stack_pointer_rtx,
6300 hard_frame_pointer_rtx,
6301 GEN_INT (offset), style);
6303 else if (frame.to_allocate)
6304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6305 GEN_INT (frame.to_allocate), style);
6307 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6308 if (ix86_save_reg (regno, false))
6311 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6313 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6315 if (frame_pointer_needed)
6317 /* Leave results in shorter dependency chains on CPUs that are
6318 able to grok it fast. */
6319 if (TARGET_USE_LEAVE)
6320 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6321 else if (TARGET_64BIT)
6322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6328 if (cfun->machine->force_align_arg_pointer)
6330 emit_insn (gen_addsi3 (stack_pointer_rtx,
6331 cfun->machine->force_align_arg_pointer,
6335 /* Sibcall epilogues don't want a return instruction. */
6339 if (current_function_pops_args && current_function_args_size)
6341 rtx popc = GEN_INT (current_function_pops_args);
6343 /* i386 can only pop 64K bytes. If asked to pop more, pop
6344 return address, do explicit add, and jump indirectly to the
6347 if (current_function_pops_args >= 65536)
6349 rtx ecx = gen_rtx_REG (SImode, 2);
6351 /* There is no "pascal" calling convention in any 64bit ABI. */
6352 gcc_assert (!TARGET_64BIT);
6354 emit_insn (gen_popsi1 (ecx));
6355 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6356 emit_jump_insn (gen_return_indirect_internal (ecx));
6359 emit_jump_insn (gen_return_pop_internal (popc));
6362 emit_jump_insn (gen_return_internal ());
6365 /* Reset from the function's potential modifications. */
6368 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6369 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6371 if (pic_offset_table_rtx)
6372 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6374 /* Mach-O doesn't support labels at the end of objects, so if
6375 it looks like we might want one, insert a NOP. */
6377 rtx insn = get_last_insn ();
6380 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6381 insn = PREV_INSN (insn);
6385 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6386 fputs ("\tnop\n", file);
6392 /* Extract the parts of an RTL expression that is a valid memory address
6393 for an instruction. Return 0 if the structure of the address is
6394 grossly off. Return -1 if the address contains ASHIFT, so it is not
6395 strictly valid, but still used for computing length of lea instruction. */
6398 ix86_decompose_address (rtx addr, struct ix86_address *out)
6400 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6401 rtx base_reg, index_reg;
6402 HOST_WIDE_INT scale = 1;
6403 rtx scale_rtx = NULL_RTX;
6405 enum ix86_address_seg seg = SEG_DEFAULT;
6407 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6409 else if (GET_CODE (addr) == PLUS)
6419 addends[n++] = XEXP (op, 1);
6422 while (GET_CODE (op) == PLUS);
6427 for (i = n; i >= 0; --i)
6430 switch (GET_CODE (op))
6435 index = XEXP (op, 0);
6436 scale_rtx = XEXP (op, 1);
6440 if (XINT (op, 1) == UNSPEC_TP
6441 && TARGET_TLS_DIRECT_SEG_REFS
6442 && seg == SEG_DEFAULT)
6443 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6472 else if (GET_CODE (addr) == MULT)
6474 index = XEXP (addr, 0); /* index*scale */
6475 scale_rtx = XEXP (addr, 1);
6477 else if (GET_CODE (addr) == ASHIFT)
6481 /* We're called for lea too, which implements ashift on occasion. */
6482 index = XEXP (addr, 0);
6483 tmp = XEXP (addr, 1);
6484 if (!CONST_INT_P (tmp))
6486 scale = INTVAL (tmp);
6487 if ((unsigned HOST_WIDE_INT) scale > 3)
6493 disp = addr; /* displacement */
6495 /* Extract the integral value of scale. */
6498 if (!CONST_INT_P (scale_rtx))
6500 scale = INTVAL (scale_rtx);
6503 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6504 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6506 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6507 if (base_reg && index_reg && scale == 1
6508 && (index_reg == arg_pointer_rtx
6509 || index_reg == frame_pointer_rtx
6510 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6513 tmp = base, base = index, index = tmp;
6514 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6517 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6518 if ((base_reg == hard_frame_pointer_rtx
6519 || base_reg == frame_pointer_rtx
6520 || base_reg == arg_pointer_rtx) && !disp)
6523 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6524 Avoid this by transforming to [%esi+0]. */
6525 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6526 && base_reg && !index_reg && !disp
6528 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6531 /* Special case: encode reg+reg instead of reg*2. */
6532 if (!base && index && scale && scale == 2)
6533 base = index, base_reg = index_reg, scale = 1;
6535 /* Special case: scaling cannot be encoded without base or displacement. */
6536 if (!base && !disp && index && scale != 1)
6548 /* Return cost of the memory address x.
6549 For i386, it is better to use a complex address than let gcc copy
6550 the address into a reg and make a new pseudo. But not if the address
6551 requires to two regs - that would mean more pseudos with longer
6554 ix86_address_cost (rtx x)
6556 struct ix86_address parts;
6558 int ok = ix86_decompose_address (x, &parts);
6562 if (parts.base && GET_CODE (parts.base) == SUBREG)
6563 parts.base = SUBREG_REG (parts.base);
6564 if (parts.index && GET_CODE (parts.index) == SUBREG)
6565 parts.index = SUBREG_REG (parts.index);
6567 /* Attempt to minimize number of registers in the address. */
6569 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6571 && (!REG_P (parts.index)
6572 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6576 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6578 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6579 && parts.base != parts.index)
6582 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6583 since it's predecode logic can't detect the length of instructions
6584 and it degenerates to vector decoded. Increase cost of such
6585 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6586 to split such addresses or even refuse such addresses at all.
6588 Following addressing modes are affected:
6593 The first and last case may be avoidable by explicitly coding the zero in
6594 memory address, but I don't have AMD-K6 machine handy to check this
6598 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6599 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6600 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6606 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6607 this is used for to form addresses to local data when -fPIC is in
6611 darwin_local_data_pic (rtx disp)
6613 if (GET_CODE (disp) == MINUS)
6615 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6616 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6617 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6619 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6620 if (! strcmp (sym_name, "<pic base>"))
6628 /* Determine if a given RTX is a valid constant. We already know this
6629 satisfies CONSTANT_P. */
6632 legitimate_constant_p (rtx x)
6634 switch (GET_CODE (x))
6639 if (GET_CODE (x) == PLUS)
6641 if (!CONST_INT_P (XEXP (x, 1)))
6646 if (TARGET_MACHO && darwin_local_data_pic (x))
6649 /* Only some unspecs are valid as "constants". */
6650 if (GET_CODE (x) == UNSPEC)
6651 switch (XINT (x, 1))
6656 return TARGET_64BIT;
6659 x = XVECEXP (x, 0, 0);
6660 return (GET_CODE (x) == SYMBOL_REF
6661 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6663 x = XVECEXP (x, 0, 0);
6664 return (GET_CODE (x) == SYMBOL_REF
6665 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6670 /* We must have drilled down to a symbol. */
6671 if (GET_CODE (x) == LABEL_REF)
6673 if (GET_CODE (x) != SYMBOL_REF)
6678 /* TLS symbols are never valid. */
6679 if (SYMBOL_REF_TLS_MODEL (x))
6682 /* DLLIMPORT symbols are never valid. */
6683 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6684 && SYMBOL_REF_DLLIMPORT_P (x))
6689 if (GET_MODE (x) == TImode
6690 && x != CONST0_RTX (TImode)
6696 if (x == CONST0_RTX (GET_MODE (x)))
6704 /* Otherwise we handle everything else in the move patterns. */
6708 /* Determine if it's legal to put X into the constant pool. This
6709 is not possible for the address of thread-local symbols, which
6710 is checked above. */
6713 ix86_cannot_force_const_mem (rtx x)
6715 /* We can always put integral constants and vectors in memory. */
6716 switch (GET_CODE (x))
6726 return !legitimate_constant_p (x);
6729 /* Determine if a given RTX is a valid constant address. */
6732 constant_address_p (rtx x)
6734 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6737 /* Nonzero if the constant value X is a legitimate general operand
6738 when generating PIC code. It is given that flag_pic is on and
6739 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6742 legitimate_pic_operand_p (rtx x)
6746 switch (GET_CODE (x))
6749 inner = XEXP (x, 0);
6750 if (GET_CODE (inner) == PLUS
6751 && CONST_INT_P (XEXP (inner, 1)))
6752 inner = XEXP (inner, 0);
6754 /* Only some unspecs are valid as "constants". */
6755 if (GET_CODE (inner) == UNSPEC)
6756 switch (XINT (inner, 1))
6761 return TARGET_64BIT;
6763 x = XVECEXP (inner, 0, 0);
6764 return (GET_CODE (x) == SYMBOL_REF
6765 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6773 return legitimate_pic_address_disp_p (x);
6780 /* Determine if a given CONST RTX is a valid memory displacement
6784 legitimate_pic_address_disp_p (rtx disp)
6788 /* In 64bit mode we can allow direct addresses of symbols and labels
6789 when they are not dynamic symbols. */
6792 rtx op0 = disp, op1;
6794 switch (GET_CODE (disp))
6800 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6802 op0 = XEXP (XEXP (disp, 0), 0);
6803 op1 = XEXP (XEXP (disp, 0), 1);
6804 if (!CONST_INT_P (op1)
6805 || INTVAL (op1) >= 16*1024*1024
6806 || INTVAL (op1) < -16*1024*1024)
6808 if (GET_CODE (op0) == LABEL_REF)
6810 if (GET_CODE (op0) != SYMBOL_REF)
6815 /* TLS references should always be enclosed in UNSPEC. */
6816 if (SYMBOL_REF_TLS_MODEL (op0))
6818 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6819 && ix86_cmodel != CM_LARGE_PIC)
6827 if (GET_CODE (disp) != CONST)
6829 disp = XEXP (disp, 0);
6833 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6834 of GOT tables. We should not need these anyway. */
6835 if (GET_CODE (disp) != UNSPEC
6836 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6837 && XINT (disp, 1) != UNSPEC_GOTOFF
6838 && XINT (disp, 1) != UNSPEC_PLTOFF))
6841 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6842 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6848 if (GET_CODE (disp) == PLUS)
6850 if (!CONST_INT_P (XEXP (disp, 1)))
6852 disp = XEXP (disp, 0);
6856 if (TARGET_MACHO && darwin_local_data_pic (disp))
6859 if (GET_CODE (disp) != UNSPEC)
6862 switch (XINT (disp, 1))
6867 /* We need to check for both symbols and labels because VxWorks loads
6868 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6870 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6871 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6873 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6874 While ABI specify also 32bit relocation but we don't produce it in
6875 small PIC model at all. */
6876 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6877 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6879 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6881 case UNSPEC_GOTTPOFF:
6882 case UNSPEC_GOTNTPOFF:
6883 case UNSPEC_INDNTPOFF:
6886 disp = XVECEXP (disp, 0, 0);
6887 return (GET_CODE (disp) == SYMBOL_REF
6888 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6890 disp = XVECEXP (disp, 0, 0);
6891 return (GET_CODE (disp) == SYMBOL_REF
6892 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6894 disp = XVECEXP (disp, 0, 0);
6895 return (GET_CODE (disp) == SYMBOL_REF
6896 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6902 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6903 memory address for an instruction. The MODE argument is the machine mode
6904 for the MEM expression that wants to use this address.
6906 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6907 convert common non-canonical forms to canonical form so that they will
6911 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6912 rtx addr, int strict)
6914 struct ix86_address parts;
6915 rtx base, index, disp;
6916 HOST_WIDE_INT scale;
6917 const char *reason = NULL;
6918 rtx reason_rtx = NULL_RTX;
6920 if (ix86_decompose_address (addr, &parts) <= 0)
6922 reason = "decomposition failed";
6927 index = parts.index;
6929 scale = parts.scale;
6931 /* Validate base register.
6933 Don't allow SUBREG's that span more than a word here. It can lead to spill
6934 failures when the base is one word out of a two word structure, which is
6935 represented internally as a DImode int. */
6944 else if (GET_CODE (base) == SUBREG
6945 && REG_P (SUBREG_REG (base))
6946 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6948 reg = SUBREG_REG (base);
6951 reason = "base is not a register";
6955 if (GET_MODE (base) != Pmode)
6957 reason = "base is not in Pmode";
6961 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6962 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6964 reason = "base is not valid";
6969 /* Validate index register.
6971 Don't allow SUBREG's that span more than a word here -- same as above. */
6980 else if (GET_CODE (index) == SUBREG
6981 && REG_P (SUBREG_REG (index))
6982 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6984 reg = SUBREG_REG (index);
6987 reason = "index is not a register";
6991 if (GET_MODE (index) != Pmode)
6993 reason = "index is not in Pmode";
6997 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6998 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7000 reason = "index is not valid";
7005 /* Validate scale factor. */
7008 reason_rtx = GEN_INT (scale);
7011 reason = "scale without index";
7015 if (scale != 2 && scale != 4 && scale != 8)
7017 reason = "scale is not a valid multiplier";
7022 /* Validate displacement. */
7027 if (GET_CODE (disp) == CONST
7028 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7029 switch (XINT (XEXP (disp, 0), 1))
7031 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7032 used. While ABI specify also 32bit relocations, we don't produce
7033 them at all and use IP relative instead. */
7036 gcc_assert (flag_pic);
7038 goto is_legitimate_pic;
7039 reason = "64bit address unspec";
7042 case UNSPEC_GOTPCREL:
7043 gcc_assert (flag_pic);
7044 goto is_legitimate_pic;
7046 case UNSPEC_GOTTPOFF:
7047 case UNSPEC_GOTNTPOFF:
7048 case UNSPEC_INDNTPOFF:
7054 reason = "invalid address unspec";
7058 else if (SYMBOLIC_CONST (disp)
7062 && MACHOPIC_INDIRECT
7063 && !machopic_operand_p (disp)
7069 if (TARGET_64BIT && (index || base))
7071 /* foo@dtpoff(%rX) is ok. */
7072 if (GET_CODE (disp) != CONST
7073 || GET_CODE (XEXP (disp, 0)) != PLUS
7074 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7075 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7076 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7077 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7079 reason = "non-constant pic memory reference";
7083 else if (! legitimate_pic_address_disp_p (disp))
7085 reason = "displacement is an invalid pic construct";
7089 /* This code used to verify that a symbolic pic displacement
7090 includes the pic_offset_table_rtx register.
7092 While this is good idea, unfortunately these constructs may
7093 be created by "adds using lea" optimization for incorrect
7102 This code is nonsensical, but results in addressing
7103 GOT table with pic_offset_table_rtx base. We can't
7104 just refuse it easily, since it gets matched by
7105 "addsi3" pattern, that later gets split to lea in the
7106 case output register differs from input. While this
7107 can be handled by separate addsi pattern for this case
7108 that never results in lea, this seems to be easier and
7109 correct fix for crash to disable this test. */
7111 else if (GET_CODE (disp) != LABEL_REF
7112 && !CONST_INT_P (disp)
7113 && (GET_CODE (disp) != CONST
7114 || !legitimate_constant_p (disp))
7115 && (GET_CODE (disp) != SYMBOL_REF
7116 || !legitimate_constant_p (disp)))
7118 reason = "displacement is not constant";
7121 else if (TARGET_64BIT
7122 && !x86_64_immediate_operand (disp, VOIDmode))
7124 reason = "displacement is out of range";
7129 /* Everything looks valid. */
7136 /* Return a unique alias set for the GOT. */
7138 static HOST_WIDE_INT
7139 ix86_GOT_alias_set (void)
7141 static HOST_WIDE_INT set = -1;
7143 set = new_alias_set ();
7147 /* Return a legitimate reference for ORIG (an address) using the
7148 register REG. If REG is 0, a new pseudo is generated.
7150 There are two types of references that must be handled:
7152 1. Global data references must load the address from the GOT, via
7153 the PIC reg. An insn is emitted to do this load, and the reg is
7156 2. Static data references, constant pool addresses, and code labels
7157 compute the address as an offset from the GOT, whose base is in
7158 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7159 differentiate them from global data objects. The returned
7160 address is the PIC reg + an unspec constant.
7162 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7163 reg also appears in the address. */
7166 legitimize_pic_address (rtx orig, rtx reg)
7173 if (TARGET_MACHO && !TARGET_64BIT)
7176 reg = gen_reg_rtx (Pmode);
7177 /* Use the generic Mach-O PIC machinery. */
7178 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7182 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7184 else if (TARGET_64BIT
7185 && ix86_cmodel != CM_SMALL_PIC
7186 && gotoff_operand (addr, Pmode))
7189 /* This symbol may be referenced via a displacement from the PIC
7190 base address (@GOTOFF). */
7192 if (reload_in_progress)
7193 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7194 if (GET_CODE (addr) == CONST)
7195 addr = XEXP (addr, 0);
7196 if (GET_CODE (addr) == PLUS)
7198 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7200 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7203 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7204 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7206 tmpreg = gen_reg_rtx (Pmode);
7209 emit_move_insn (tmpreg, new_rtx);
7213 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7214 tmpreg, 1, OPTAB_DIRECT);
7217 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7219 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7221 /* This symbol may be referenced via a displacement from the PIC
7222 base address (@GOTOFF). */
7224 if (reload_in_progress)
7225 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7226 if (GET_CODE (addr) == CONST)
7227 addr = XEXP (addr, 0);
7228 if (GET_CODE (addr) == PLUS)
7230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7232 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7235 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7236 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7237 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7241 emit_move_insn (reg, new_rtx);
7245 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7246 /* We can't use @GOTOFF for text labels on VxWorks;
7247 see gotoff_operand. */
7248 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7250 /* Given that we've already handled dllimport variables separately
7251 in legitimize_address, and all other variables should satisfy
7252 legitimate_pic_address_disp_p, we should never arrive here. */
7253 gcc_assert (!TARGET_64BIT_MS_ABI);
7255 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7257 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7258 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7259 new_rtx = gen_const_mem (Pmode, new_rtx);
7260 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7263 reg = gen_reg_rtx (Pmode);
7264 /* Use directly gen_movsi, otherwise the address is loaded
7265 into register for CSE. We don't want to CSE this addresses,
7266 instead we CSE addresses from the GOT table, so skip this. */
7267 emit_insn (gen_movsi (reg, new_rtx));
7272 /* This symbol must be referenced via a load from the
7273 Global Offset Table (@GOT). */
7275 if (reload_in_progress)
7276 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7277 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7278 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7280 new_rtx = force_reg (Pmode, new_rtx);
7281 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7282 new_rtx = gen_const_mem (Pmode, new_rtx);
7283 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7286 reg = gen_reg_rtx (Pmode);
7287 emit_move_insn (reg, new_rtx);
7293 if (CONST_INT_P (addr)
7294 && !x86_64_immediate_operand (addr, VOIDmode))
7298 emit_move_insn (reg, addr);
7302 new_rtx = force_reg (Pmode, addr);
7304 else if (GET_CODE (addr) == CONST)
7306 addr = XEXP (addr, 0);
7308 /* We must match stuff we generate before. Assume the only
7309 unspecs that can get here are ours. Not that we could do
7310 anything with them anyway.... */
7311 if (GET_CODE (addr) == UNSPEC
7312 || (GET_CODE (addr) == PLUS
7313 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7315 gcc_assert (GET_CODE (addr) == PLUS);
7317 if (GET_CODE (addr) == PLUS)
7319 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7321 /* Check first to see if this is a constant offset from a @GOTOFF
7322 symbol reference. */
7323 if (gotoff_operand (op0, Pmode)
7324 && CONST_INT_P (op1))
7328 if (reload_in_progress)
7329 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7330 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7332 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7333 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7334 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7338 emit_move_insn (reg, new_rtx);
7344 if (INTVAL (op1) < -16*1024*1024
7345 || INTVAL (op1) >= 16*1024*1024)
7347 if (!x86_64_immediate_operand (op1, Pmode))
7348 op1 = force_reg (Pmode, op1);
7349 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7355 base = legitimize_pic_address (XEXP (addr, 0), reg);
7356 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7357 base == reg ? NULL_RTX : reg);
7359 if (CONST_INT_P (new_rtx))
7360 new_rtx = plus_constant (base, INTVAL (new_rtx));
7363 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7365 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7366 new_rtx = XEXP (new_rtx, 1);
7368 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7376 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7379 get_thread_pointer (int to_reg)
7383 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7387 reg = gen_reg_rtx (Pmode);
7388 insn = gen_rtx_SET (VOIDmode, reg, tp);
7389 insn = emit_insn (insn);
7394 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7395 false if we expect this to be used for a memory address and true if
7396 we expect to load the address into a register. */
7399 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7401 rtx dest, base, off, pic, tp;
7406 case TLS_MODEL_GLOBAL_DYNAMIC:
7407 dest = gen_reg_rtx (Pmode);
7408 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7410 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7412 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7415 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7416 insns = get_insns ();
7419 CONST_OR_PURE_CALL_P (insns) = 1;
7420 emit_libcall_block (insns, dest, rax, x);
7422 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7423 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7425 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7427 if (TARGET_GNU2_TLS)
7429 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7431 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7435 case TLS_MODEL_LOCAL_DYNAMIC:
7436 base = gen_reg_rtx (Pmode);
7437 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7439 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7441 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7444 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7445 insns = get_insns ();
7448 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7449 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7450 CONST_OR_PURE_CALL_P (insns) = 1;
7451 emit_libcall_block (insns, base, rax, note);
7453 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7454 emit_insn (gen_tls_local_dynamic_base_64 (base));
7456 emit_insn (gen_tls_local_dynamic_base_32 (base));
7458 if (TARGET_GNU2_TLS)
7460 rtx x = ix86_tls_module_base ();
7462 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7463 gen_rtx_MINUS (Pmode, x, tp));
7466 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7467 off = gen_rtx_CONST (Pmode, off);
7469 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7471 if (TARGET_GNU2_TLS)
7473 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7475 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7480 case TLS_MODEL_INITIAL_EXEC:
7484 type = UNSPEC_GOTNTPOFF;
7488 if (reload_in_progress)
7489 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7490 pic = pic_offset_table_rtx;
7491 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7493 else if (!TARGET_ANY_GNU_TLS)
7495 pic = gen_reg_rtx (Pmode);
7496 emit_insn (gen_set_got (pic));
7497 type = UNSPEC_GOTTPOFF;
7502 type = UNSPEC_INDNTPOFF;
7505 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7506 off = gen_rtx_CONST (Pmode, off);
7508 off = gen_rtx_PLUS (Pmode, pic, off);
7509 off = gen_const_mem (Pmode, off);
7510 set_mem_alias_set (off, ix86_GOT_alias_set ());
7512 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7514 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7515 off = force_reg (Pmode, off);
7516 return gen_rtx_PLUS (Pmode, base, off);
7520 base = get_thread_pointer (true);
7521 dest = gen_reg_rtx (Pmode);
7522 emit_insn (gen_subsi3 (dest, base, off));
7526 case TLS_MODEL_LOCAL_EXEC:
7527 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7528 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7529 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7530 off = gen_rtx_CONST (Pmode, off);
7532 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7535 return gen_rtx_PLUS (Pmode, base, off);
7539 base = get_thread_pointer (true);
7540 dest = gen_reg_rtx (Pmode);
7541 emit_insn (gen_subsi3 (dest, base, off));
7552 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7555 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7556 htab_t dllimport_map;
7559 get_dllimport_decl (tree decl)
7561 struct tree_map *h, in;
7565 size_t namelen, prefixlen;
7571 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7573 in.hash = htab_hash_pointer (decl);
7574 in.base.from = decl;
7575 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7576 h = (struct tree_map *) *loc;
7580 *loc = h = GGC_NEW (struct tree_map);
7582 h->base.from = decl;
7583 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7584 DECL_ARTIFICIAL (to) = 1;
7585 DECL_IGNORED_P (to) = 1;
7586 DECL_EXTERNAL (to) = 1;
7587 TREE_READONLY (to) = 1;
7589 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7590 name = targetm.strip_name_encoding (name);
7591 if (name[0] == FASTCALL_PREFIX)
7597 prefix = "*__imp__";
7599 namelen = strlen (name);
7600 prefixlen = strlen (prefix);
7601 imp_name = (char *) alloca (namelen + prefixlen + 1);
7602 memcpy (imp_name, prefix, prefixlen);
7603 memcpy (imp_name + prefixlen, name, namelen + 1);
7605 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7606 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7607 SET_SYMBOL_REF_DECL (rtl, to);
7608 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7610 rtl = gen_const_mem (Pmode, rtl);
7611 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7613 SET_DECL_RTL (to, rtl);
7618 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7619 true if we require the result be a register. */
7622 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7627 gcc_assert (SYMBOL_REF_DECL (symbol));
7628 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7630 x = DECL_RTL (imp_decl);
7632 x = force_reg (Pmode, x);
7636 /* Try machine-dependent ways of modifying an illegitimate address
7637 to be legitimate. If we find one, return the new, valid address.
7638 This macro is used in only one place: `memory_address' in explow.c.
7640 OLDX is the address as it was before break_out_memory_refs was called.
7641 In some cases it is useful to look at this to decide what needs to be done.
7643 MODE and WIN are passed so that this macro can use
7644 GO_IF_LEGITIMATE_ADDRESS.
7646 It is always safe for this macro to do nothing. It exists to recognize
7647 opportunities to optimize the output.
7649 For the 80386, we handle X+REG by loading X into a register R and
7650 using R+REG. R will go in a general reg and indexing will be used.
7651 However, if REG is a broken-out memory address or multiplication,
7652 nothing needs to be done because REG can certainly go in a general reg.
7654 When -fpic is used, special handling is needed for symbolic references.
7655 See comments by legitimize_pic_address in i386.c for details. */
7658 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7663 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7665 return legitimize_tls_address (x, (enum tls_model) log, false);
7666 if (GET_CODE (x) == CONST
7667 && GET_CODE (XEXP (x, 0)) == PLUS
7668 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7669 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7671 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7672 (enum tls_model) log, false);
7673 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7676 if (flag_pic && SYMBOLIC_CONST (x))
7677 return legitimize_pic_address (x, 0);
7679 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7681 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7682 return legitimize_dllimport_symbol (x, true);
7683 if (GET_CODE (x) == CONST
7684 && GET_CODE (XEXP (x, 0)) == PLUS
7685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7686 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7688 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7689 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7693 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7694 if (GET_CODE (x) == ASHIFT
7695 && CONST_INT_P (XEXP (x, 1))
7696 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7699 log = INTVAL (XEXP (x, 1));
7700 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7701 GEN_INT (1 << log));
7704 if (GET_CODE (x) == PLUS)
7706 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7708 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7709 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7710 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7713 log = INTVAL (XEXP (XEXP (x, 0), 1));
7714 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7715 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7716 GEN_INT (1 << log));
7719 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7720 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7721 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7724 log = INTVAL (XEXP (XEXP (x, 1), 1));
7725 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7726 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7727 GEN_INT (1 << log));
7730 /* Put multiply first if it isn't already. */
7731 if (GET_CODE (XEXP (x, 1)) == MULT)
7733 rtx tmp = XEXP (x, 0);
7734 XEXP (x, 0) = XEXP (x, 1);
7739 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7740 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7741 created by virtual register instantiation, register elimination, and
7742 similar optimizations. */
7743 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7746 x = gen_rtx_PLUS (Pmode,
7747 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7748 XEXP (XEXP (x, 1), 0)),
7749 XEXP (XEXP (x, 1), 1));
7753 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7754 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7755 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7756 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7757 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7758 && CONSTANT_P (XEXP (x, 1)))
7761 rtx other = NULL_RTX;
7763 if (CONST_INT_P (XEXP (x, 1)))
7765 constant = XEXP (x, 1);
7766 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7768 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7770 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7771 other = XEXP (x, 1);
7779 x = gen_rtx_PLUS (Pmode,
7780 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7781 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7782 plus_constant (other, INTVAL (constant)));
7786 if (changed && legitimate_address_p (mode, x, FALSE))
7789 if (GET_CODE (XEXP (x, 0)) == MULT)
7792 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7795 if (GET_CODE (XEXP (x, 1)) == MULT)
7798 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7802 && REG_P (XEXP (x, 1))
7803 && REG_P (XEXP (x, 0)))
7806 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7809 x = legitimize_pic_address (x, 0);
7812 if (changed && legitimate_address_p (mode, x, FALSE))
7815 if (REG_P (XEXP (x, 0)))
7817 rtx temp = gen_reg_rtx (Pmode);
7818 rtx val = force_operand (XEXP (x, 1), temp);
7820 emit_move_insn (temp, val);
7826 else if (REG_P (XEXP (x, 1)))
7828 rtx temp = gen_reg_rtx (Pmode);
7829 rtx val = force_operand (XEXP (x, 0), temp);
7831 emit_move_insn (temp, val);
7841 /* Print an integer constant expression in assembler syntax. Addition
7842 and subtraction are the only arithmetic that may appear in these
7843 expressions. FILE is the stdio stream to write to, X is the rtx, and
7844 CODE is the operand print code from the output string. */
7847 output_pic_addr_const (FILE *file, rtx x, int code)
7851 switch (GET_CODE (x))
7854 gcc_assert (flag_pic);
7859 if (! TARGET_MACHO || TARGET_64BIT)
7860 output_addr_const (file, x);
7863 const char *name = XSTR (x, 0);
7865 /* Mark the decl as referenced so that cgraph will
7866 output the function. */
7867 if (SYMBOL_REF_DECL (x))
7868 mark_decl_referenced (SYMBOL_REF_DECL (x));
7871 if (MACHOPIC_INDIRECT
7872 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7873 name = machopic_indirection_name (x, /*stub_p=*/true);
7875 assemble_name (file, name);
7877 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7878 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7879 fputs ("@PLT", file);
7886 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7887 assemble_name (asm_out_file, buf);
7891 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7895 /* This used to output parentheses around the expression,
7896 but that does not work on the 386 (either ATT or BSD assembler). */
7897 output_pic_addr_const (file, XEXP (x, 0), code);
7901 if (GET_MODE (x) == VOIDmode)
7903 /* We can use %d if the number is <32 bits and positive. */
7904 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7905 fprintf (file, "0x%lx%08lx",
7906 (unsigned long) CONST_DOUBLE_HIGH (x),
7907 (unsigned long) CONST_DOUBLE_LOW (x));
7909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7912 /* We can't handle floating point constants;
7913 PRINT_OPERAND must handle them. */
7914 output_operand_lossage ("floating constant misused");
7918 /* Some assemblers need integer constants to appear first. */
7919 if (CONST_INT_P (XEXP (x, 0)))
7921 output_pic_addr_const (file, XEXP (x, 0), code);
7923 output_pic_addr_const (file, XEXP (x, 1), code);
7927 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7928 output_pic_addr_const (file, XEXP (x, 1), code);
7930 output_pic_addr_const (file, XEXP (x, 0), code);
7936 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7937 output_pic_addr_const (file, XEXP (x, 0), code);
7939 output_pic_addr_const (file, XEXP (x, 1), code);
7941 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7945 gcc_assert (XVECLEN (x, 0) == 1);
7946 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7947 switch (XINT (x, 1))
7950 fputs ("@GOT", file);
7953 fputs ("@GOTOFF", file);
7956 fputs ("@PLTOFF", file);
7958 case UNSPEC_GOTPCREL:
7959 fputs ("@GOTPCREL(%rip)", file);
7961 case UNSPEC_GOTTPOFF:
7962 /* FIXME: This might be @TPOFF in Sun ld too. */
7963 fputs ("@GOTTPOFF", file);
7966 fputs ("@TPOFF", file);
7970 fputs ("@TPOFF", file);
7972 fputs ("@NTPOFF", file);
7975 fputs ("@DTPOFF", file);
7977 case UNSPEC_GOTNTPOFF:
7979 fputs ("@GOTTPOFF(%rip)", file);
7981 fputs ("@GOTNTPOFF", file);
7983 case UNSPEC_INDNTPOFF:
7984 fputs ("@INDNTPOFF", file);
7987 output_operand_lossage ("invalid UNSPEC as operand");
7993 output_operand_lossage ("invalid expression as operand");
7997 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7998 We need to emit DTP-relative relocations. */
8000 static void ATTRIBUTE_UNUSED
8001 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8003 fputs (ASM_LONG, file);
8004 output_addr_const (file, x);
8005 fputs ("@DTPOFF", file);
8011 fputs (", 0", file);
8018 /* In the name of slightly smaller debug output, and to cater to
8019 general assembler lossage, recognize PIC+GOTOFF and turn it back
8020 into a direct symbol reference.
8022 On Darwin, this is necessary to avoid a crash, because Darwin
8023 has a different PIC label for each routine but the DWARF debugging
8024 information is not associated with any particular routine, so it's
8025 necessary to remove references to the PIC label from RTL stored by
8026 the DWARF output code. */
8029 ix86_delegitimize_address (rtx orig_x)
8032 /* reg_addend is NULL or a multiple of some register. */
8033 rtx reg_addend = NULL_RTX;
8034 /* const_addend is NULL or a const_int. */
8035 rtx const_addend = NULL_RTX;
8036 /* This is the result, or NULL. */
8037 rtx result = NULL_RTX;
8044 if (GET_CODE (x) != CONST
8045 || GET_CODE (XEXP (x, 0)) != UNSPEC
8046 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8049 return XVECEXP (XEXP (x, 0), 0, 0);
8052 if (GET_CODE (x) != PLUS
8053 || GET_CODE (XEXP (x, 1)) != CONST)
8056 if (REG_P (XEXP (x, 0))
8057 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8058 /* %ebx + GOT/GOTOFF */
8060 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8062 /* %ebx + %reg * scale + GOT/GOTOFF */
8063 reg_addend = XEXP (x, 0);
8064 if (REG_P (XEXP (reg_addend, 0))
8065 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8066 reg_addend = XEXP (reg_addend, 1);
8067 else if (REG_P (XEXP (reg_addend, 1))
8068 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8069 reg_addend = XEXP (reg_addend, 0);
8072 if (!REG_P (reg_addend)
8073 && GET_CODE (reg_addend) != MULT
8074 && GET_CODE (reg_addend) != ASHIFT)
8080 x = XEXP (XEXP (x, 1), 0);
8081 if (GET_CODE (x) == PLUS
8082 && CONST_INT_P (XEXP (x, 1)))
8084 const_addend = XEXP (x, 1);
8088 if (GET_CODE (x) == UNSPEC
8089 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8090 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8091 result = XVECEXP (x, 0, 0);
8093 if (TARGET_MACHO && darwin_local_data_pic (x)
8095 result = XEXP (x, 0);
8101 result = gen_rtx_PLUS (Pmode, result, const_addend);
8103 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8107 /* If X is a machine specific address (i.e. a symbol or label being
8108 referenced as a displacement from the GOT implemented using an
8109 UNSPEC), then return the base term. Otherwise return X. */
8112 ix86_find_base_term (rtx x)
8118 if (GET_CODE (x) != CONST)
8121 if (GET_CODE (term) == PLUS
8122 && (CONST_INT_P (XEXP (term, 1))
8123 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8124 term = XEXP (term, 0);
8125 if (GET_CODE (term) != UNSPEC
8126 || XINT (term, 1) != UNSPEC_GOTPCREL)
8129 term = XVECEXP (term, 0, 0);
8131 if (GET_CODE (term) != SYMBOL_REF
8132 && GET_CODE (term) != LABEL_REF)
8138 term = ix86_delegitimize_address (x);
8140 if (GET_CODE (term) != SYMBOL_REF
8141 && GET_CODE (term) != LABEL_REF)
8148 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8153 if (mode == CCFPmode || mode == CCFPUmode)
8155 enum rtx_code second_code, bypass_code;
8156 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8157 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8158 code = ix86_fp_compare_code_to_integer (code);
8162 code = reverse_condition (code);
8213 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8217 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8218 Those same assemblers have the same but opposite lossage on cmov. */
8219 gcc_assert (mode == CCmode);
8220 suffix = fp ? "nbe" : "a";
8240 gcc_assert (mode == CCmode);
8262 gcc_assert (mode == CCmode);
8263 suffix = fp ? "nb" : "ae";
8266 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8270 gcc_assert (mode == CCmode);
8274 suffix = fp ? "u" : "p";
8277 suffix = fp ? "nu" : "np";
8282 fputs (suffix, file);
8285 /* Print the name of register X to FILE based on its machine mode and number.
8286 If CODE is 'w', pretend the mode is HImode.
8287 If CODE is 'b', pretend the mode is QImode.
8288 If CODE is 'k', pretend the mode is SImode.
8289 If CODE is 'q', pretend the mode is DImode.
8290 If CODE is 'h', pretend the reg is the 'high' byte register.
8291 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8294 print_reg (rtx x, int code, FILE *file)
8296 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8297 && REGNO (x) != FRAME_POINTER_REGNUM
8298 && REGNO (x) != FLAGS_REG
8299 && REGNO (x) != FPSR_REG
8300 && REGNO (x) != FPCR_REG);
8302 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8305 if (code == 'w' || MMX_REG_P (x))
8307 else if (code == 'b')
8309 else if (code == 'k')
8311 else if (code == 'q')
8313 else if (code == 'y')
8315 else if (code == 'h')
8318 code = GET_MODE_SIZE (GET_MODE (x));
8320 /* Irritatingly, AMD extended registers use different naming convention
8321 from the normal registers. */
8322 if (REX_INT_REG_P (x))
8324 gcc_assert (TARGET_64BIT);
8328 error ("extended registers have no high halves");
8331 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8334 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8337 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8340 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8343 error ("unsupported operand size for extended register");
8351 if (STACK_TOP_P (x))
8353 fputs ("st(0)", file);
8360 if (! ANY_FP_REG_P (x))
8361 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8366 fputs (hi_reg_name[REGNO (x)], file);
8369 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8371 fputs (qi_reg_name[REGNO (x)], file);
8374 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8376 fputs (qi_high_reg_name[REGNO (x)], file);
8383 /* Locate some local-dynamic symbol still in use by this function
8384 so that we can print its name in some tls_local_dynamic_base
8388 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8392 if (GET_CODE (x) == SYMBOL_REF
8393 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8395 cfun->machine->some_ld_name = XSTR (x, 0);
8403 get_some_local_dynamic_name (void)
8407 if (cfun->machine->some_ld_name)
8408 return cfun->machine->some_ld_name;
8410 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8412 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8413 return cfun->machine->some_ld_name;
8419 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8420 C -- print opcode suffix for set/cmov insn.
8421 c -- like C, but print reversed condition
8422 F,f -- likewise, but for floating-point.
8423 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8425 R -- print the prefix for register names.
8426 z -- print the opcode suffix for the size of the current operand.
8427 * -- print a star (in certain assembler syntax)
8428 A -- print an absolute memory reference.
8429 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8430 s -- print a shift double count, followed by the assemblers argument
8432 b -- print the QImode name of the register for the indicated operand.
8433 %b0 would print %al if operands[0] is reg 0.
8434 w -- likewise, print the HImode name of the register.
8435 k -- likewise, print the SImode name of the register.
8436 q -- likewise, print the DImode name of the register.
8437 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8438 y -- print "st(0)" instead of "st" as a register.
8439 D -- print condition for SSE cmp instruction.
8440 P -- if PIC, print an @PLT suffix.
8441 X -- don't print any sort of PIC '@' suffix for a symbol.
8442 & -- print some in-use local-dynamic symbol name.
8443 H -- print a memory address offset by 8; used for sse high-parts
8447 print_operand (FILE *file, rtx x, int code)
8454 if (ASSEMBLER_DIALECT == ASM_ATT)
8459 assemble_name (file, get_some_local_dynamic_name ());
8463 switch (ASSEMBLER_DIALECT)
8470 /* Intel syntax. For absolute addresses, registers should not
8471 be surrounded by braces. */
8475 PRINT_OPERAND (file, x, 0);
8485 PRINT_OPERAND (file, x, 0);
8490 if (ASSEMBLER_DIALECT == ASM_ATT)
8495 if (ASSEMBLER_DIALECT == ASM_ATT)
8500 if (ASSEMBLER_DIALECT == ASM_ATT)
8505 if (ASSEMBLER_DIALECT == ASM_ATT)
8510 if (ASSEMBLER_DIALECT == ASM_ATT)
8515 if (ASSEMBLER_DIALECT == ASM_ATT)
8520 /* 387 opcodes don't get size suffixes if the operands are
8522 if (STACK_REG_P (x))
8525 /* Likewise if using Intel opcodes. */
8526 if (ASSEMBLER_DIALECT == ASM_INTEL)
8529 /* This is the size of op from size of operand. */
8530 switch (GET_MODE_SIZE (GET_MODE (x)))
8539 #ifdef HAVE_GAS_FILDS_FISTS
8549 if (GET_MODE (x) == SFmode)
8564 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8566 #ifdef GAS_MNEMONICS
8592 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8594 PRINT_OPERAND (file, x, 0);
8600 /* Little bit of braindamage here. The SSE compare instructions
8601 does use completely different names for the comparisons that the
8602 fp conditional moves. */
8603 switch (GET_CODE (x))
8618 fputs ("unord", file);
8622 fputs ("neq", file);
8626 fputs ("nlt", file);
8630 fputs ("nle", file);
8633 fputs ("ord", file);
8640 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8641 if (ASSEMBLER_DIALECT == ASM_ATT)
8643 switch (GET_MODE (x))
8645 case HImode: putc ('w', file); break;
8647 case SFmode: putc ('l', file); break;
8649 case DFmode: putc ('q', file); break;
8650 default: gcc_unreachable ();
8657 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8660 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8661 if (ASSEMBLER_DIALECT == ASM_ATT)
8664 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8667 /* Like above, but reverse condition */
8669 /* Check to see if argument to %c is really a constant
8670 and not a condition code which needs to be reversed. */
8671 if (!COMPARISON_P (x))
8673 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8676 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8679 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8680 if (ASSEMBLER_DIALECT == ASM_ATT)
8683 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8687 /* It doesn't actually matter what mode we use here, as we're
8688 only going to use this for printing. */
8689 x = adjust_address_nv (x, DImode, 8);
8696 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8699 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8702 int pred_val = INTVAL (XEXP (x, 0));
8704 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8705 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8707 int taken = pred_val > REG_BR_PROB_BASE / 2;
8708 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8710 /* Emit hints only in the case default branch prediction
8711 heuristics would fail. */
8712 if (taken != cputaken)
8714 /* We use 3e (DS) prefix for taken branches and
8715 2e (CS) prefix for not taken branches. */
8717 fputs ("ds ; ", file);
8719 fputs ("cs ; ", file);
8726 output_operand_lossage ("invalid operand code '%c'", code);
8731 print_reg (x, code, file);
8735 /* No `byte ptr' prefix for call instructions. */
8736 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8739 switch (GET_MODE_SIZE (GET_MODE (x)))
8741 case 1: size = "BYTE"; break;
8742 case 2: size = "WORD"; break;
8743 case 4: size = "DWORD"; break;
8744 case 8: size = "QWORD"; break;
8745 case 12: size = "XWORD"; break;
8746 case 16: size = "XMMWORD"; break;
8751 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8754 else if (code == 'w')
8756 else if (code == 'k')
8760 fputs (" PTR ", file);
8764 /* Avoid (%rip) for call operands. */
8765 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8766 && !CONST_INT_P (x))
8767 output_addr_const (file, x);
8768 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8769 output_operand_lossage ("invalid constraints for operand");
8774 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8779 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8780 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8782 if (ASSEMBLER_DIALECT == ASM_ATT)
8784 fprintf (file, "0x%08lx", l);
8787 /* These float cases don't actually occur as immediate operands. */
8788 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8792 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8793 fprintf (file, "%s", dstr);
8796 else if (GET_CODE (x) == CONST_DOUBLE
8797 && GET_MODE (x) == XFmode)
8801 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8802 fprintf (file, "%s", dstr);
8807 /* We have patterns that allow zero sets of memory, for instance.
8808 In 64-bit mode, we should probably support all 8-byte vectors,
8809 since we can in fact encode that into an immediate. */
8810 if (GET_CODE (x) == CONST_VECTOR)
8812 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8818 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8820 if (ASSEMBLER_DIALECT == ASM_ATT)
8823 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8824 || GET_CODE (x) == LABEL_REF)
8826 if (ASSEMBLER_DIALECT == ASM_ATT)
8829 fputs ("OFFSET FLAT:", file);
8832 if (CONST_INT_P (x))
8833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8835 output_pic_addr_const (file, x, code);
8837 output_addr_const (file, x);
8841 /* Print a memory operand whose address is ADDR. */
8844 print_operand_address (FILE *file, rtx addr)
8846 struct ix86_address parts;
8847 rtx base, index, disp;
8849 int ok = ix86_decompose_address (addr, &parts);
8854 index = parts.index;
8856 scale = parts.scale;
8864 if (USER_LABEL_PREFIX[0] == 0)
8866 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8872 if (!base && !index)
8874 /* Displacement only requires special attention. */
8876 if (CONST_INT_P (disp))
8878 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8880 if (USER_LABEL_PREFIX[0] == 0)
8882 fputs ("ds:", file);
8884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8887 output_pic_addr_const (file, disp, 0);
8889 output_addr_const (file, disp);
8891 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8894 if (GET_CODE (disp) == CONST
8895 && GET_CODE (XEXP (disp, 0)) == PLUS
8896 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8897 disp = XEXP (XEXP (disp, 0), 0);
8898 if (GET_CODE (disp) == LABEL_REF
8899 || (GET_CODE (disp) == SYMBOL_REF
8900 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8901 fputs ("(%rip)", file);
8906 if (ASSEMBLER_DIALECT == ASM_ATT)
8911 output_pic_addr_const (file, disp, 0);
8912 else if (GET_CODE (disp) == LABEL_REF)
8913 output_asm_label (disp);
8915 output_addr_const (file, disp);
8920 print_reg (base, 0, file);
8924 print_reg (index, 0, file);
8926 fprintf (file, ",%d", scale);
8932 rtx offset = NULL_RTX;
8936 /* Pull out the offset of a symbol; print any symbol itself. */
8937 if (GET_CODE (disp) == CONST
8938 && GET_CODE (XEXP (disp, 0)) == PLUS
8939 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8941 offset = XEXP (XEXP (disp, 0), 1);
8942 disp = gen_rtx_CONST (VOIDmode,
8943 XEXP (XEXP (disp, 0), 0));
8947 output_pic_addr_const (file, disp, 0);
8948 else if (GET_CODE (disp) == LABEL_REF)
8949 output_asm_label (disp);
8950 else if (CONST_INT_P (disp))
8953 output_addr_const (file, disp);
8959 print_reg (base, 0, file);
8962 if (INTVAL (offset) >= 0)
8964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8975 print_reg (index, 0, file);
8977 fprintf (file, "*%d", scale);
8985 output_addr_const_extra (FILE *file, rtx x)
8989 if (GET_CODE (x) != UNSPEC)
8992 op = XVECEXP (x, 0, 0);
8993 switch (XINT (x, 1))
8995 case UNSPEC_GOTTPOFF:
8996 output_addr_const (file, op);
8997 /* FIXME: This might be @TPOFF in Sun ld. */
8998 fputs ("@GOTTPOFF", file);
9001 output_addr_const (file, op);
9002 fputs ("@TPOFF", file);
9005 output_addr_const (file, op);
9007 fputs ("@TPOFF", file);
9009 fputs ("@NTPOFF", file);
9012 output_addr_const (file, op);
9013 fputs ("@DTPOFF", file);
9015 case UNSPEC_GOTNTPOFF:
9016 output_addr_const (file, op);
9018 fputs ("@GOTTPOFF(%rip)", file);
9020 fputs ("@GOTNTPOFF", file);
9022 case UNSPEC_INDNTPOFF:
9023 output_addr_const (file, op);
9024 fputs ("@INDNTPOFF", file);
9034 /* Split one or more DImode RTL references into pairs of SImode
9035 references. The RTL can be REG, offsettable MEM, integer constant, or
9036 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9037 split and "num" is its length. lo_half and hi_half are output arrays
9038 that parallel "operands". */
9041 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9045 rtx op = operands[num];
9047 /* simplify_subreg refuse to split volatile memory addresses,
9048 but we still have to handle it. */
9051 lo_half[num] = adjust_address (op, SImode, 0);
9052 hi_half[num] = adjust_address (op, SImode, 4);
9056 lo_half[num] = simplify_gen_subreg (SImode, op,
9057 GET_MODE (op) == VOIDmode
9058 ? DImode : GET_MODE (op), 0);
9059 hi_half[num] = simplify_gen_subreg (SImode, op,
9060 GET_MODE (op) == VOIDmode
9061 ? DImode : GET_MODE (op), 4);
9065 /* Split one or more TImode RTL references into pairs of DImode
9066 references. The RTL can be REG, offsettable MEM, integer constant, or
9067 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9068 split and "num" is its length. lo_half and hi_half are output arrays
9069 that parallel "operands". */
9072 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9076 rtx op = operands[num];
9078 /* simplify_subreg refuse to split volatile memory addresses, but we
9079 still have to handle it. */
9082 lo_half[num] = adjust_address (op, DImode, 0);
9083 hi_half[num] = adjust_address (op, DImode, 8);
9087 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9088 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9093 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9094 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9095 is the expression of the binary operation. The output may either be
9096 emitted here, or returned to the caller, like all output_* functions.
9098 There is no guarantee that the operands are the same mode, as they
9099 might be within FLOAT or FLOAT_EXTEND expressions. */
9101 #ifndef SYSV386_COMPAT
9102 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9103 wants to fix the assemblers because that causes incompatibility
9104 with gcc. No-one wants to fix gcc because that causes
9105 incompatibility with assemblers... You can use the option of
9106 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9107 #define SYSV386_COMPAT 1
9111 output_387_binary_op (rtx insn, rtx *operands)
9113 static char buf[30];
9116 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9118 #ifdef ENABLE_CHECKING
9119 /* Even if we do not want to check the inputs, this documents input
9120 constraints. Which helps in understanding the following code. */
9121 if (STACK_REG_P (operands[0])
9122 && ((REG_P (operands[1])
9123 && REGNO (operands[0]) == REGNO (operands[1])
9124 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9125 || (REG_P (operands[2])
9126 && REGNO (operands[0]) == REGNO (operands[2])
9127 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9128 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9131 gcc_assert (is_sse);
9134 switch (GET_CODE (operands[3]))
9137 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9138 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9146 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9147 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9155 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9156 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9164 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9165 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9179 if (GET_MODE (operands[0]) == SFmode)
9180 strcat (buf, "ss\t{%2, %0|%0, %2}");
9182 strcat (buf, "sd\t{%2, %0|%0, %2}");
9187 switch (GET_CODE (operands[3]))
9191 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9193 rtx temp = operands[2];
9194 operands[2] = operands[1];
9198 /* know operands[0] == operands[1]. */
9200 if (MEM_P (operands[2]))
9206 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9208 if (STACK_TOP_P (operands[0]))
9209 /* How is it that we are storing to a dead operand[2]?
9210 Well, presumably operands[1] is dead too. We can't
9211 store the result to st(0) as st(0) gets popped on this
9212 instruction. Instead store to operands[2] (which I
9213 think has to be st(1)). st(1) will be popped later.
9214 gcc <= 2.8.1 didn't have this check and generated
9215 assembly code that the Unixware assembler rejected. */
9216 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9218 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9222 if (STACK_TOP_P (operands[0]))
9223 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9225 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9230 if (MEM_P (operands[1]))
9236 if (MEM_P (operands[2]))
9242 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9245 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9246 derived assemblers, confusingly reverse the direction of
9247 the operation for fsub{r} and fdiv{r} when the
9248 destination register is not st(0). The Intel assembler
9249 doesn't have this brain damage. Read !SYSV386_COMPAT to
9250 figure out what the hardware really does. */
9251 if (STACK_TOP_P (operands[0]))
9252 p = "{p\t%0, %2|rp\t%2, %0}";
9254 p = "{rp\t%2, %0|p\t%0, %2}";
9256 if (STACK_TOP_P (operands[0]))
9257 /* As above for fmul/fadd, we can't store to st(0). */
9258 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9260 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9265 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9268 if (STACK_TOP_P (operands[0]))
9269 p = "{rp\t%0, %1|p\t%1, %0}";
9271 p = "{p\t%1, %0|rp\t%0, %1}";
9273 if (STACK_TOP_P (operands[0]))
9274 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9276 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9281 if (STACK_TOP_P (operands[0]))
9283 if (STACK_TOP_P (operands[1]))
9284 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9286 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9289 else if (STACK_TOP_P (operands[1]))
9292 p = "{\t%1, %0|r\t%0, %1}";
9294 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9300 p = "{r\t%2, %0|\t%0, %2}";
9302 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9315 /* Return needed mode for entity in optimize_mode_switching pass. */
9318 ix86_mode_needed (int entity, rtx insn)
9320 enum attr_i387_cw mode;
9322 /* The mode UNINITIALIZED is used to store control word after a
9323 function call or ASM pattern. The mode ANY specify that function
9324 has no requirements on the control word and make no changes in the
9325 bits we are interested in. */
9328 || (NONJUMP_INSN_P (insn)
9329 && (asm_noperands (PATTERN (insn)) >= 0
9330 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9331 return I387_CW_UNINITIALIZED;
9333 if (recog_memoized (insn) < 0)
9336 mode = get_attr_i387_cw (insn);
9341 if (mode == I387_CW_TRUNC)
9346 if (mode == I387_CW_FLOOR)
9351 if (mode == I387_CW_CEIL)
9356 if (mode == I387_CW_MASK_PM)
9367 /* Output code to initialize control word copies used by trunc?f?i and
9368 rounding patterns. CURRENT_MODE is set to current control word,
9369 while NEW_MODE is set to new control word. */
9372 emit_i387_cw_initialization (int mode)
9374 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9377 enum ix86_stack_slot slot;
9379 rtx reg = gen_reg_rtx (HImode);
9381 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9382 emit_move_insn (reg, copy_rtx (stored_mode));
9384 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9389 /* round toward zero (truncate) */
9390 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9391 slot = SLOT_CW_TRUNC;
9395 /* round down toward -oo */
9396 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9397 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9398 slot = SLOT_CW_FLOOR;
9402 /* round up toward +oo */
9403 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9404 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9405 slot = SLOT_CW_CEIL;
9408 case I387_CW_MASK_PM:
9409 /* mask precision exception for nearbyint() */
9410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9411 slot = SLOT_CW_MASK_PM;
9423 /* round toward zero (truncate) */
9424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9425 slot = SLOT_CW_TRUNC;
9429 /* round down toward -oo */
9430 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9431 slot = SLOT_CW_FLOOR;
9435 /* round up toward +oo */
9436 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9437 slot = SLOT_CW_CEIL;
9440 case I387_CW_MASK_PM:
9441 /* mask precision exception for nearbyint() */
9442 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9443 slot = SLOT_CW_MASK_PM;
9451 gcc_assert (slot < MAX_386_STACK_LOCALS);
9453 new_mode = assign_386_stack_local (HImode, slot);
9454 emit_move_insn (new_mode, reg);
9457 /* Output code for INSN to convert a float to a signed int. OPERANDS
9458 are the insn operands. The output may be [HSD]Imode and the input
9459 operand may be [SDX]Fmode. */
9462 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9464 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9465 int dimode_p = GET_MODE (operands[0]) == DImode;
9466 int round_mode = get_attr_i387_cw (insn);
9468 /* Jump through a hoop or two for DImode, since the hardware has no
9469 non-popping instruction. We used to do this a different way, but
9470 that was somewhat fragile and broke with post-reload splitters. */
9471 if ((dimode_p || fisttp) && !stack_top_dies)
9472 output_asm_insn ("fld\t%y1", operands);
9474 gcc_assert (STACK_TOP_P (operands[1]));
9475 gcc_assert (MEM_P (operands[0]));
9476 gcc_assert (GET_MODE (operands[1]) != TFmode);
9479 output_asm_insn ("fisttp%z0\t%0", operands);
9482 if (round_mode != I387_CW_ANY)
9483 output_asm_insn ("fldcw\t%3", operands);
9484 if (stack_top_dies || dimode_p)
9485 output_asm_insn ("fistp%z0\t%0", operands);
9487 output_asm_insn ("fist%z0\t%0", operands);
9488 if (round_mode != I387_CW_ANY)
9489 output_asm_insn ("fldcw\t%2", operands);
9495 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9496 have the values zero or one, indicates the ffreep insn's operand
9497 from the OPERANDS array. */
9500 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9502 if (TARGET_USE_FFREEP)
9503 #if HAVE_AS_IX86_FFREEP
9504 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9507 static char retval[] = ".word\t0xc_df";
9508 int regno = REGNO (operands[opno]);
9510 gcc_assert (FP_REGNO_P (regno));
9512 retval[9] = '0' + (regno - FIRST_STACK_REG);
9517 return opno ? "fstp\t%y1" : "fstp\t%y0";
9521 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9522 should be used. UNORDERED_P is true when fucom should be used. */
9525 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9528 rtx cmp_op0, cmp_op1;
9529 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9533 cmp_op0 = operands[0];
9534 cmp_op1 = operands[1];
9538 cmp_op0 = operands[1];
9539 cmp_op1 = operands[2];
9544 if (GET_MODE (operands[0]) == SFmode)
9546 return "ucomiss\t{%1, %0|%0, %1}";
9548 return "comiss\t{%1, %0|%0, %1}";
9551 return "ucomisd\t{%1, %0|%0, %1}";
9553 return "comisd\t{%1, %0|%0, %1}";
9556 gcc_assert (STACK_TOP_P (cmp_op0));
9558 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9560 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9564 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9565 return output_387_ffreep (operands, 1);
9568 return "ftst\n\tfnstsw\t%0";
9571 if (STACK_REG_P (cmp_op1)
9573 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9574 && REGNO (cmp_op1) != FIRST_STACK_REG)
9576 /* If both the top of the 387 stack dies, and the other operand
9577 is also a stack register that dies, then this must be a
9578 `fcompp' float compare */
9582 /* There is no double popping fcomi variant. Fortunately,
9583 eflags is immune from the fstp's cc clobbering. */
9585 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9587 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9588 return output_387_ffreep (operands, 0);
9593 return "fucompp\n\tfnstsw\t%0";
9595 return "fcompp\n\tfnstsw\t%0";
9600 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9602 static const char * const alt[16] =
9604 "fcom%z2\t%y2\n\tfnstsw\t%0",
9605 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9606 "fucom%z2\t%y2\n\tfnstsw\t%0",
9607 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9609 "ficom%z2\t%y2\n\tfnstsw\t%0",
9610 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9614 "fcomi\t{%y1, %0|%0, %y1}",
9615 "fcomip\t{%y1, %0|%0, %y1}",
9616 "fucomi\t{%y1, %0|%0, %y1}",
9617 "fucomip\t{%y1, %0|%0, %y1}",
9628 mask = eflags_p << 3;
9629 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9630 mask |= unordered_p << 1;
9631 mask |= stack_top_dies;
9633 gcc_assert (mask < 16);
9642 ix86_output_addr_vec_elt (FILE *file, int value)
9644 const char *directive = ASM_LONG;
9648 directive = ASM_QUAD;
9650 gcc_assert (!TARGET_64BIT);
9653 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9657 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9659 const char *directive = ASM_LONG;
9662 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9663 directive = ASM_QUAD;
9665 gcc_assert (!TARGET_64BIT);
9667 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9668 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9669 fprintf (file, "%s%s%d-%s%d\n",
9670 directive, LPREFIX, value, LPREFIX, rel);
9671 else if (HAVE_AS_GOTOFF_IN_DATA)
9672 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9674 else if (TARGET_MACHO)
9676 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9677 machopic_output_function_base_name (file);
9678 fprintf(file, "\n");
9682 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9683 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9686 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9690 ix86_expand_clear (rtx dest)
9694 /* We play register width games, which are only valid after reload. */
9695 gcc_assert (reload_completed);
9697 /* Avoid HImode and its attendant prefix byte. */
9698 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9699 dest = gen_rtx_REG (SImode, REGNO (dest));
9700 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9702 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9703 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9705 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9706 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9712 /* X is an unchanging MEM. If it is a constant pool reference, return
9713 the constant pool rtx, else NULL. */
9716 maybe_get_pool_constant (rtx x)
9718 x = ix86_delegitimize_address (XEXP (x, 0));
9720 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9721 return get_pool_constant (x);
9727 ix86_expand_move (enum machine_mode mode, rtx operands[])
9729 int strict = (reload_in_progress || reload_completed);
9731 enum tls_model model;
9736 if (GET_CODE (op1) == SYMBOL_REF)
9738 model = SYMBOL_REF_TLS_MODEL (op1);
9741 op1 = legitimize_tls_address (op1, model, true);
9742 op1 = force_operand (op1, op0);
9746 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9747 && SYMBOL_REF_DLLIMPORT_P (op1))
9748 op1 = legitimize_dllimport_symbol (op1, false);
9750 else if (GET_CODE (op1) == CONST
9751 && GET_CODE (XEXP (op1, 0)) == PLUS
9752 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9754 rtx addend = XEXP (XEXP (op1, 0), 1);
9755 rtx symbol = XEXP (XEXP (op1, 0), 0);
9758 model = SYMBOL_REF_TLS_MODEL (symbol);
9760 tmp = legitimize_tls_address (symbol, model, true);
9761 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9762 && SYMBOL_REF_DLLIMPORT_P (symbol))
9763 tmp = legitimize_dllimport_symbol (symbol, true);
9767 tmp = force_operand (tmp, NULL);
9768 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9769 op0, 1, OPTAB_DIRECT);
9775 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9777 if (TARGET_MACHO && !TARGET_64BIT)
9782 rtx temp = ((reload_in_progress
9783 || ((op0 && REG_P (op0))
9785 ? op0 : gen_reg_rtx (Pmode));
9786 op1 = machopic_indirect_data_reference (op1, temp);
9787 op1 = machopic_legitimize_pic_address (op1, mode,
9788 temp == op1 ? 0 : temp);
9790 else if (MACHOPIC_INDIRECT)
9791 op1 = machopic_indirect_data_reference (op1, 0);
9799 op1 = force_reg (Pmode, op1);
9800 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9802 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9803 op1 = legitimize_pic_address (op1, reg);
9812 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9813 || !push_operand (op0, mode))
9815 op1 = force_reg (mode, op1);
9817 if (push_operand (op0, mode)
9818 && ! general_no_elim_operand (op1, mode))
9819 op1 = copy_to_mode_reg (mode, op1);
9821 /* Force large constants in 64bit compilation into register
9822 to get them CSEed. */
9823 if (TARGET_64BIT && mode == DImode
9824 && immediate_operand (op1, mode)
9825 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9826 && !register_operand (op0, mode)
9827 && optimize && !reload_completed && !reload_in_progress)
9828 op1 = copy_to_mode_reg (mode, op1);
9830 if (FLOAT_MODE_P (mode))
9832 /* If we are loading a floating point constant to a register,
9833 force the value to memory now, since we'll get better code
9834 out the back end. */
9838 else if (GET_CODE (op1) == CONST_DOUBLE)
9840 op1 = validize_mem (force_const_mem (mode, op1));
9841 if (!register_operand (op0, mode))
9843 rtx temp = gen_reg_rtx (mode);
9844 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9845 emit_move_insn (op0, temp);
9852 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9856 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9858 rtx op0 = operands[0], op1 = operands[1];
9859 unsigned int align = GET_MODE_ALIGNMENT (mode);
9861 /* Force constants other than zero into memory. We do not know how
9862 the instructions used to build constants modify the upper 64 bits
9863 of the register, once we have that information we may be able
9864 to handle some of them more efficiently. */
9865 if ((reload_in_progress | reload_completed) == 0
9866 && register_operand (op0, mode)
9867 && (CONSTANT_P (op1)
9868 || (GET_CODE (op1) == SUBREG
9869 && CONSTANT_P (SUBREG_REG (op1))))
9870 && standard_sse_constant_p (op1) <= 0)
9871 op1 = validize_mem (force_const_mem (mode, op1));
9873 /* TDmode values are passed as TImode on the stack. Timode values
9874 are moved via xmm registers, and moving them to stack can result in
9875 unaligned memory access. Use ix86_expand_vector_move_misalign()
9876 if memory operand is not aligned correctly. */
9878 && (mode == TImode) && !TARGET_64BIT
9879 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9880 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9884 /* ix86_expand_vector_move_misalign() does not like constants ... */
9885 if (CONSTANT_P (op1)
9886 || (GET_CODE (op1) == SUBREG
9887 && CONSTANT_P (SUBREG_REG (op1))))
9888 op1 = validize_mem (force_const_mem (mode, op1));
9890 /* ... nor both arguments in memory. */
9891 if (!register_operand (op0, mode)
9892 && !register_operand (op1, mode))
9893 op1 = force_reg (mode, op1);
9895 tmp[0] = op0; tmp[1] = op1;
9896 ix86_expand_vector_move_misalign (mode, tmp);
9900 /* Make operand1 a register if it isn't already. */
9902 && !register_operand (op0, mode)
9903 && !register_operand (op1, mode))
9905 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9909 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9912 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9913 straight to ix86_expand_vector_move. */
9914 /* Code generation for scalar reg-reg moves of single and double precision data:
9915 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9919 if (x86_sse_partial_reg_dependency == true)
9924 Code generation for scalar loads of double precision data:
9925 if (x86_sse_split_regs == true)
9926 movlpd mem, reg (gas syntax)
9930 Code generation for unaligned packed loads of single precision data
9931 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9932 if (x86_sse_unaligned_move_optimal)
9935 if (x86_sse_partial_reg_dependency == true)
9947 Code generation for unaligned packed loads of double precision data
9948 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9949 if (x86_sse_unaligned_move_optimal)
9952 if (x86_sse_split_regs == true)
9965 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9974 /* If we're optimizing for size, movups is the smallest. */
9977 op0 = gen_lowpart (V4SFmode, op0);
9978 op1 = gen_lowpart (V4SFmode, op1);
9979 emit_insn (gen_sse_movups (op0, op1));
9983 /* ??? If we have typed data, then it would appear that using
9984 movdqu is the only way to get unaligned data loaded with
9986 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9988 op0 = gen_lowpart (V16QImode, op0);
9989 op1 = gen_lowpart (V16QImode, op1);
9990 emit_insn (gen_sse2_movdqu (op0, op1));
9994 if (TARGET_SSE2 && mode == V2DFmode)
9998 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10000 op0 = gen_lowpart (V2DFmode, op0);
10001 op1 = gen_lowpart (V2DFmode, op1);
10002 emit_insn (gen_sse2_movupd (op0, op1));
10006 /* When SSE registers are split into halves, we can avoid
10007 writing to the top half twice. */
10008 if (TARGET_SSE_SPLIT_REGS)
10010 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10015 /* ??? Not sure about the best option for the Intel chips.
10016 The following would seem to satisfy; the register is
10017 entirely cleared, breaking the dependency chain. We
10018 then store to the upper half, with a dependency depth
10019 of one. A rumor has it that Intel recommends two movsd
10020 followed by an unpacklpd, but this is unconfirmed. And
10021 given that the dependency depth of the unpacklpd would
10022 still be one, I'm not sure why this would be better. */
10023 zero = CONST0_RTX (V2DFmode);
10026 m = adjust_address (op1, DFmode, 0);
10027 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10028 m = adjust_address (op1, DFmode, 8);
10029 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10033 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10035 op0 = gen_lowpart (V4SFmode, op0);
10036 op1 = gen_lowpart (V4SFmode, op1);
10037 emit_insn (gen_sse_movups (op0, op1));
10041 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10042 emit_move_insn (op0, CONST0_RTX (mode));
10044 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10046 if (mode != V4SFmode)
10047 op0 = gen_lowpart (V4SFmode, op0);
10048 m = adjust_address (op1, V2SFmode, 0);
10049 emit_insn (gen_sse_loadlps (op0, op0, m));
10050 m = adjust_address (op1, V2SFmode, 8);
10051 emit_insn (gen_sse_loadhps (op0, op0, m));
10054 else if (MEM_P (op0))
10056 /* If we're optimizing for size, movups is the smallest. */
10059 op0 = gen_lowpart (V4SFmode, op0);
10060 op1 = gen_lowpart (V4SFmode, op1);
10061 emit_insn (gen_sse_movups (op0, op1));
10065 /* ??? Similar to above, only less clear because of quote
10066 typeless stores unquote. */
10067 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10068 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10070 op0 = gen_lowpart (V16QImode, op0);
10071 op1 = gen_lowpart (V16QImode, op1);
10072 emit_insn (gen_sse2_movdqu (op0, op1));
10076 if (TARGET_SSE2 && mode == V2DFmode)
10078 m = adjust_address (op0, DFmode, 0);
10079 emit_insn (gen_sse2_storelpd (m, op1));
10080 m = adjust_address (op0, DFmode, 8);
10081 emit_insn (gen_sse2_storehpd (m, op1));
10085 if (mode != V4SFmode)
10086 op1 = gen_lowpart (V4SFmode, op1);
10087 m = adjust_address (op0, V2SFmode, 0);
10088 emit_insn (gen_sse_storelps (m, op1));
10089 m = adjust_address (op0, V2SFmode, 8);
10090 emit_insn (gen_sse_storehps (m, op1));
10094 gcc_unreachable ();
10097 /* Expand a push in MODE. This is some mode for which we do not support
10098 proper push instructions, at least from the registers that we expect
10099 the value to live in. */
10102 ix86_expand_push (enum machine_mode mode, rtx x)
10106 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10107 GEN_INT (-GET_MODE_SIZE (mode)),
10108 stack_pointer_rtx, 1, OPTAB_DIRECT);
10109 if (tmp != stack_pointer_rtx)
10110 emit_move_insn (stack_pointer_rtx, tmp);
10112 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10113 emit_move_insn (tmp, x);
10116 /* Helper function of ix86_fixup_binary_operands to canonicalize
10117 operand order. Returns true if the operands should be swapped. */
10120 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10123 rtx dst = operands[0];
10124 rtx src1 = operands[1];
10125 rtx src2 = operands[2];
10127 /* If the operation is not commutative, we can't do anything. */
10128 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10131 /* Highest priority is that src1 should match dst. */
10132 if (rtx_equal_p (dst, src1))
10134 if (rtx_equal_p (dst, src2))
10137 /* Next highest priority is that immediate constants come second. */
10138 if (immediate_operand (src2, mode))
10140 if (immediate_operand (src1, mode))
10143 /* Lowest priority is that memory references should come second. */
10153 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10154 destination to use for the operation. If different from the true
10155 destination in operands[0], a copy operation will be required. */
10158 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10161 rtx dst = operands[0];
10162 rtx src1 = operands[1];
10163 rtx src2 = operands[2];
10165 /* Canonicalize operand order. */
10166 if (ix86_swap_binary_operands_p (code, mode, operands))
10173 /* Both source operands cannot be in memory. */
10174 if (MEM_P (src1) && MEM_P (src2))
10176 /* Optimization: Only read from memory once. */
10177 if (rtx_equal_p (src1, src2))
10179 src2 = force_reg (mode, src2);
10183 src2 = force_reg (mode, src2);
10186 /* If the destination is memory, and we do not have matching source
10187 operands, do things in registers. */
10188 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10189 dst = gen_reg_rtx (mode);
10191 /* Source 1 cannot be a constant. */
10192 if (CONSTANT_P (src1))
10193 src1 = force_reg (mode, src1);
10195 /* Source 1 cannot be a non-matching memory. */
10196 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10197 src1 = force_reg (mode, src1);
10199 operands[1] = src1;
10200 operands[2] = src2;
10204 /* Similarly, but assume that the destination has already been
10205 set up properly. */
10208 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10209 enum machine_mode mode, rtx operands[])
10211 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10212 gcc_assert (dst == operands[0]);
10215 /* Attempt to expand a binary operator. Make the expansion closer to the
10216 actual machine, then just general_operand, which will allow 3 separate
10217 memory references (one output, two input) in a single insn. */
10220 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10223 rtx src1, src2, dst, op, clob;
10225 dst = ix86_fixup_binary_operands (code, mode, operands);
10226 src1 = operands[1];
10227 src2 = operands[2];
10229 /* Emit the instruction. */
10231 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10232 if (reload_in_progress)
10234 /* Reload doesn't know about the flags register, and doesn't know that
10235 it doesn't want to clobber it. We can only do this with PLUS. */
10236 gcc_assert (code == PLUS);
10241 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10242 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10245 /* Fix up the destination if needed. */
10246 if (dst != operands[0])
10247 emit_move_insn (operands[0], dst);
10250 /* Return TRUE or FALSE depending on whether the binary operator meets the
10251 appropriate constraints. */
10254 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10257 rtx dst = operands[0];
10258 rtx src1 = operands[1];
10259 rtx src2 = operands[2];
10261 /* Both source operands cannot be in memory. */
10262 if (MEM_P (src1) && MEM_P (src2))
10265 /* Canonicalize operand order for commutative operators. */
10266 if (ix86_swap_binary_operands_p (code, mode, operands))
10273 /* If the destination is memory, we must have a matching source operand. */
10274 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10277 /* Source 1 cannot be a constant. */
10278 if (CONSTANT_P (src1))
10281 /* Source 1 cannot be a non-matching memory. */
10282 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10288 /* Attempt to expand a unary operator. Make the expansion closer to the
10289 actual machine, then just general_operand, which will allow 2 separate
10290 memory references (one output, one input) in a single insn. */
10293 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10296 int matching_memory;
10297 rtx src, dst, op, clob;
10302 /* If the destination is memory, and we do not have matching source
10303 operands, do things in registers. */
10304 matching_memory = 0;
10307 if (rtx_equal_p (dst, src))
10308 matching_memory = 1;
10310 dst = gen_reg_rtx (mode);
10313 /* When source operand is memory, destination must match. */
10314 if (MEM_P (src) && !matching_memory)
10315 src = force_reg (mode, src);
10317 /* Emit the instruction. */
10319 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10320 if (reload_in_progress || code == NOT)
10322 /* Reload doesn't know about the flags register, and doesn't know that
10323 it doesn't want to clobber it. */
10324 gcc_assert (code == NOT);
10329 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10330 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10333 /* Fix up the destination if needed. */
10334 if (dst != operands[0])
10335 emit_move_insn (operands[0], dst);
10338 /* Return TRUE or FALSE depending on whether the unary operator meets the
10339 appropriate constraints. */
10342 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10343 enum machine_mode mode ATTRIBUTE_UNUSED,
10344 rtx operands[2] ATTRIBUTE_UNUSED)
10346 /* If one of operands is memory, source and destination must match. */
10347 if ((MEM_P (operands[0])
10348 || MEM_P (operands[1]))
10349 && ! rtx_equal_p (operands[0], operands[1]))
10354 /* Post-reload splitter for converting an SF or DFmode value in an
10355 SSE register into an unsigned SImode. */
10358 ix86_split_convert_uns_si_sse (rtx operands[])
10360 enum machine_mode vecmode;
10361 rtx value, large, zero_or_two31, input, two31, x;
10363 large = operands[1];
10364 zero_or_two31 = operands[2];
10365 input = operands[3];
10366 two31 = operands[4];
10367 vecmode = GET_MODE (large);
10368 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10370 /* Load up the value into the low element. We must ensure that the other
10371 elements are valid floats -- zero is the easiest such value. */
10374 if (vecmode == V4SFmode)
10375 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10377 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10381 input = gen_rtx_REG (vecmode, REGNO (input));
10382 emit_move_insn (value, CONST0_RTX (vecmode));
10383 if (vecmode == V4SFmode)
10384 emit_insn (gen_sse_movss (value, value, input));
10386 emit_insn (gen_sse2_movsd (value, value, input));
10389 emit_move_insn (large, two31);
10390 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10392 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10393 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10395 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10396 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10398 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10399 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10401 large = gen_rtx_REG (V4SImode, REGNO (large));
10402 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10404 x = gen_rtx_REG (V4SImode, REGNO (value));
10405 if (vecmode == V4SFmode)
10406 emit_insn (gen_sse2_cvttps2dq (x, value));
10408 emit_insn (gen_sse2_cvttpd2dq (x, value));
10411 emit_insn (gen_xorv4si3 (value, value, large));
10414 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10415 Expects the 64-bit DImode to be supplied in a pair of integral
10416 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10417 -mfpmath=sse, !optimize_size only. */
10420 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10422 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10423 rtx int_xmm, fp_xmm;
10424 rtx biases, exponents;
10427 int_xmm = gen_reg_rtx (V4SImode);
10428 if (TARGET_INTER_UNIT_MOVES)
10429 emit_insn (gen_movdi_to_sse (int_xmm, input));
10430 else if (TARGET_SSE_SPLIT_REGS)
10432 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10433 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10437 x = gen_reg_rtx (V2DImode);
10438 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10439 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10442 x = gen_rtx_CONST_VECTOR (V4SImode,
10443 gen_rtvec (4, GEN_INT (0x43300000UL),
10444 GEN_INT (0x45300000UL),
10445 const0_rtx, const0_rtx));
10446 exponents = validize_mem (force_const_mem (V4SImode, x));
10448 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10449 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10451 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10452 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10453 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10454 (0x1.0p84 + double(fp_value_hi_xmm)).
10455 Note these exponents differ by 32. */
10457 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10459 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10460 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10461 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10462 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10463 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10464 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10465 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10466 biases = validize_mem (force_const_mem (V2DFmode, biases));
10467 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10469 /* Add the upper and lower DFmode values together. */
10471 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10474 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10475 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10476 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10479 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10482 /* Convert an unsigned SImode value into a DFmode. Only currently used
10483 for SSE, but applicable anywhere. */
10486 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10488 REAL_VALUE_TYPE TWO31r;
10491 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10492 NULL, 1, OPTAB_DIRECT);
10494 fp = gen_reg_rtx (DFmode);
10495 emit_insn (gen_floatsidf2 (fp, x));
10497 real_ldexp (&TWO31r, &dconst1, 31);
10498 x = const_double_from_real_value (TWO31r, DFmode);
10500 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10502 emit_move_insn (target, x);
10505 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10506 32-bit mode; otherwise we have a direct convert instruction. */
10509 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10511 REAL_VALUE_TYPE TWO32r;
10512 rtx fp_lo, fp_hi, x;
10514 fp_lo = gen_reg_rtx (DFmode);
10515 fp_hi = gen_reg_rtx (DFmode);
10517 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10519 real_ldexp (&TWO32r, &dconst1, 32);
10520 x = const_double_from_real_value (TWO32r, DFmode);
10521 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10523 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10525 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10528 emit_move_insn (target, x);
10531 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10532 For x86_32, -mfpmath=sse, !optimize_size only. */
10534 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10536 REAL_VALUE_TYPE ONE16r;
10537 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10539 real_ldexp (&ONE16r, &dconst1, 16);
10540 x = const_double_from_real_value (ONE16r, SFmode);
10541 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10542 NULL, 0, OPTAB_DIRECT);
10543 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10544 NULL, 0, OPTAB_DIRECT);
10545 fp_hi = gen_reg_rtx (SFmode);
10546 fp_lo = gen_reg_rtx (SFmode);
10547 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10548 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10549 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10551 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10553 if (!rtx_equal_p (target, fp_hi))
10554 emit_move_insn (target, fp_hi);
10557 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10558 then replicate the value for all elements of the vector
10562 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10569 v = gen_rtvec (4, value, value, value, value);
10570 return gen_rtx_CONST_VECTOR (V4SImode, v);
10574 v = gen_rtvec (2, value, value);
10575 return gen_rtx_CONST_VECTOR (V2DImode, v);
10579 v = gen_rtvec (4, value, value, value, value);
10581 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10582 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10583 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10587 v = gen_rtvec (2, value, value);
10589 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10590 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10593 gcc_unreachable ();
10597 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10598 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10599 for an SSE register. If VECT is true, then replicate the mask for
10600 all elements of the vector register. If INVERT is true, then create
10601 a mask excluding the sign bit. */
10604 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10606 enum machine_mode vec_mode, imode;
10607 HOST_WIDE_INT hi, lo;
10612 /* Find the sign bit, sign extended to 2*HWI. */
10618 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10619 lo = 0x80000000, hi = lo < 0;
10625 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10626 if (HOST_BITS_PER_WIDE_INT >= 64)
10627 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10629 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10635 vec_mode = VOIDmode;
10636 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10637 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10641 gcc_unreachable ();
10645 lo = ~lo, hi = ~hi;
10647 /* Force this value into the low part of a fp vector constant. */
10648 mask = immed_double_const (lo, hi, imode);
10649 mask = gen_lowpart (mode, mask);
10651 if (vec_mode == VOIDmode)
10652 return force_reg (mode, mask);
10654 v = ix86_build_const_vector (mode, vect, mask);
10655 return force_reg (vec_mode, v);
10658 /* Generate code for floating point ABS or NEG. */
10661 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10664 rtx mask, set, use, clob, dst, src;
10665 bool matching_memory;
10666 bool use_sse = false;
10667 bool vector_mode = VECTOR_MODE_P (mode);
10668 enum machine_mode elt_mode = mode;
10672 elt_mode = GET_MODE_INNER (mode);
10675 else if (mode == TFmode)
10677 else if (TARGET_SSE_MATH)
10678 use_sse = SSE_FLOAT_MODE_P (mode);
10680 /* NEG and ABS performed with SSE use bitwise mask operations.
10681 Create the appropriate mask now. */
10683 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10690 /* If the destination is memory, and we don't have matching source
10691 operands or we're using the x87, do things in registers. */
10692 matching_memory = false;
10695 if (use_sse && rtx_equal_p (dst, src))
10696 matching_memory = true;
10698 dst = gen_reg_rtx (mode);
10700 if (MEM_P (src) && !matching_memory)
10701 src = force_reg (mode, src);
10705 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10706 set = gen_rtx_SET (VOIDmode, dst, set);
10711 set = gen_rtx_fmt_e (code, mode, src);
10712 set = gen_rtx_SET (VOIDmode, dst, set);
10715 use = gen_rtx_USE (VOIDmode, mask);
10716 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10717 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10718 gen_rtvec (3, set, use, clob)));
10724 if (dst != operands[0])
10725 emit_move_insn (operands[0], dst);
10728 /* Expand a copysign operation. Special case operand 0 being a constant. */
10731 ix86_expand_copysign (rtx operands[])
10733 enum machine_mode mode, vmode;
10734 rtx dest, op0, op1, mask, nmask;
10736 dest = operands[0];
10740 mode = GET_MODE (dest);
10741 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10743 if (GET_CODE (op0) == CONST_DOUBLE)
10745 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10747 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10748 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10750 if (mode == SFmode || mode == DFmode)
10752 if (op0 == CONST0_RTX (mode))
10753 op0 = CONST0_RTX (vmode);
10758 if (mode == SFmode)
10759 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10760 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10762 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10763 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10767 mask = ix86_build_signbit_mask (mode, 0, 0);
10769 if (mode == SFmode)
10770 copysign_insn = gen_copysignsf3_const;
10771 else if (mode == DFmode)
10772 copysign_insn = gen_copysigndf3_const;
10774 copysign_insn = gen_copysigntf3_const;
10776 emit_insn (copysign_insn (dest, op0, op1, mask));
10780 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10782 nmask = ix86_build_signbit_mask (mode, 0, 1);
10783 mask = ix86_build_signbit_mask (mode, 0, 0);
10785 if (mode == SFmode)
10786 copysign_insn = gen_copysignsf3_var;
10787 else if (mode == DFmode)
10788 copysign_insn = gen_copysigndf3_var;
10790 copysign_insn = gen_copysigntf3_var;
10792 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10796 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10797 be a constant, and so has already been expanded into a vector constant. */
10800 ix86_split_copysign_const (rtx operands[])
10802 enum machine_mode mode, vmode;
10803 rtx dest, op0, op1, mask, x;
10805 dest = operands[0];
10808 mask = operands[3];
10810 mode = GET_MODE (dest);
10811 vmode = GET_MODE (mask);
10813 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10814 x = gen_rtx_AND (vmode, dest, mask);
10815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10817 if (op0 != CONST0_RTX (vmode))
10819 x = gen_rtx_IOR (vmode, dest, op0);
10820 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10824 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10825 so we have to do two masks. */
10828 ix86_split_copysign_var (rtx operands[])
10830 enum machine_mode mode, vmode;
10831 rtx dest, scratch, op0, op1, mask, nmask, x;
10833 dest = operands[0];
10834 scratch = operands[1];
10837 nmask = operands[4];
10838 mask = operands[5];
10840 mode = GET_MODE (dest);
10841 vmode = GET_MODE (mask);
10843 if (rtx_equal_p (op0, op1))
10845 /* Shouldn't happen often (it's useless, obviously), but when it does
10846 we'd generate incorrect code if we continue below. */
10847 emit_move_insn (dest, op0);
10851 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10853 gcc_assert (REGNO (op1) == REGNO (scratch));
10855 x = gen_rtx_AND (vmode, scratch, mask);
10856 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10859 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10860 x = gen_rtx_NOT (vmode, dest);
10861 x = gen_rtx_AND (vmode, x, op0);
10862 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10866 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10868 x = gen_rtx_AND (vmode, scratch, mask);
10870 else /* alternative 2,4 */
10872 gcc_assert (REGNO (mask) == REGNO (scratch));
10873 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10874 x = gen_rtx_AND (vmode, scratch, op1);
10876 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10878 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10880 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10881 x = gen_rtx_AND (vmode, dest, nmask);
10883 else /* alternative 3,4 */
10885 gcc_assert (REGNO (nmask) == REGNO (dest));
10887 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10888 x = gen_rtx_AND (vmode, dest, op0);
10890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10893 x = gen_rtx_IOR (vmode, dest, scratch);
10894 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10897 /* Return TRUE or FALSE depending on whether the first SET in INSN
10898 has source and destination with matching CC modes, and that the
10899 CC mode is at least as constrained as REQ_MODE. */
10902 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10905 enum machine_mode set_mode;
10907 set = PATTERN (insn);
10908 if (GET_CODE (set) == PARALLEL)
10909 set = XVECEXP (set, 0, 0);
10910 gcc_assert (GET_CODE (set) == SET);
10911 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10913 set_mode = GET_MODE (SET_DEST (set));
10917 if (req_mode != CCNOmode
10918 && (req_mode != CCmode
10919 || XEXP (SET_SRC (set), 1) != const0_rtx))
10923 if (req_mode == CCGCmode)
10927 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10931 if (req_mode == CCZmode)
10938 gcc_unreachable ();
10941 return (GET_MODE (SET_SRC (set)) == set_mode);
10944 /* Generate insn patterns to do an integer compare of OPERANDS. */
10947 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10949 enum machine_mode cmpmode;
10952 cmpmode = SELECT_CC_MODE (code, op0, op1);
10953 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10955 /* This is very simple, but making the interface the same as in the
10956 FP case makes the rest of the code easier. */
10957 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10958 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10960 /* Return the test that should be put into the flags user, i.e.
10961 the bcc, scc, or cmov instruction. */
10962 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10965 /* Figure out whether to use ordered or unordered fp comparisons.
10966 Return the appropriate mode to use. */
10969 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10971 /* ??? In order to make all comparisons reversible, we do all comparisons
10972 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10973 all forms trapping and nontrapping comparisons, we can make inequality
10974 comparisons trapping again, since it results in better code when using
10975 FCOM based compares. */
10976 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10980 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10982 enum machine_mode mode = GET_MODE (op0);
10984 if (SCALAR_FLOAT_MODE_P (mode))
10986 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10987 return ix86_fp_compare_mode (code);
10992 /* Only zero flag is needed. */
10993 case EQ: /* ZF=0 */
10994 case NE: /* ZF!=0 */
10996 /* Codes needing carry flag. */
10997 case GEU: /* CF=0 */
10998 case GTU: /* CF=0 & ZF=0 */
10999 case LTU: /* CF=1 */
11000 case LEU: /* CF=1 | ZF=1 */
11002 /* Codes possibly doable only with sign flag when
11003 comparing against zero. */
11004 case GE: /* SF=OF or SF=0 */
11005 case LT: /* SF<>OF or SF=1 */
11006 if (op1 == const0_rtx)
11009 /* For other cases Carry flag is not required. */
11011 /* Codes doable only with sign flag when comparing
11012 against zero, but we miss jump instruction for it
11013 so we need to use relational tests against overflow
11014 that thus needs to be zero. */
11015 case GT: /* ZF=0 & SF=OF */
11016 case LE: /* ZF=1 | SF<>OF */
11017 if (op1 == const0_rtx)
11021 /* strcmp pattern do (use flags) and combine may ask us for proper
11026 gcc_unreachable ();
11030 /* Return the fixed registers used for condition codes. */
11033 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11040 /* If two condition code modes are compatible, return a condition code
11041 mode which is compatible with both. Otherwise, return
11044 static enum machine_mode
11045 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11050 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11053 if ((m1 == CCGCmode && m2 == CCGOCmode)
11054 || (m1 == CCGOCmode && m2 == CCGCmode))
11060 gcc_unreachable ();
11090 /* These are only compatible with themselves, which we already
11096 /* Split comparison code CODE into comparisons we can do using branch
11097 instructions. BYPASS_CODE is comparison code for branch that will
11098 branch around FIRST_CODE and SECOND_CODE. If some of branches
11099 is not required, set value to UNKNOWN.
11100 We never require more than two branches. */
11103 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11104 enum rtx_code *first_code,
11105 enum rtx_code *second_code)
11107 *first_code = code;
11108 *bypass_code = UNKNOWN;
11109 *second_code = UNKNOWN;
11111 /* The fcomi comparison sets flags as follows:
11121 case GT: /* GTU - CF=0 & ZF=0 */
11122 case GE: /* GEU - CF=0 */
11123 case ORDERED: /* PF=0 */
11124 case UNORDERED: /* PF=1 */
11125 case UNEQ: /* EQ - ZF=1 */
11126 case UNLT: /* LTU - CF=1 */
11127 case UNLE: /* LEU - CF=1 | ZF=1 */
11128 case LTGT: /* EQ - ZF=0 */
11130 case LT: /* LTU - CF=1 - fails on unordered */
11131 *first_code = UNLT;
11132 *bypass_code = UNORDERED;
11134 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11135 *first_code = UNLE;
11136 *bypass_code = UNORDERED;
11138 case EQ: /* EQ - ZF=1 - fails on unordered */
11139 *first_code = UNEQ;
11140 *bypass_code = UNORDERED;
11142 case NE: /* NE - ZF=0 - fails on unordered */
11143 *first_code = LTGT;
11144 *second_code = UNORDERED;
11146 case UNGE: /* GEU - CF=0 - fails on unordered */
11148 *second_code = UNORDERED;
11150 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11152 *second_code = UNORDERED;
11155 gcc_unreachable ();
11157 if (!TARGET_IEEE_FP)
11159 *second_code = UNKNOWN;
11160 *bypass_code = UNKNOWN;
11164 /* Return cost of comparison done fcom + arithmetics operations on AX.
11165 All following functions do use number of instructions as a cost metrics.
11166 In future this should be tweaked to compute bytes for optimize_size and
11167 take into account performance of various instructions on various CPUs. */
11169 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11171 if (!TARGET_IEEE_FP)
11173 /* The cost of code output by ix86_expand_fp_compare. */
11197 gcc_unreachable ();
11201 /* Return cost of comparison done using fcomi operation.
11202 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11204 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11206 enum rtx_code bypass_code, first_code, second_code;
11207 /* Return arbitrarily high cost when instruction is not supported - this
11208 prevents gcc from using it. */
11211 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11212 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11215 /* Return cost of comparison done using sahf operation.
11216 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11218 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11220 enum rtx_code bypass_code, first_code, second_code;
11221 /* Return arbitrarily high cost when instruction is not preferred - this
11222 avoids gcc from using it. */
11223 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11225 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11226 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11229 /* Compute cost of the comparison done using any method.
11230 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11232 ix86_fp_comparison_cost (enum rtx_code code)
11234 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11237 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11238 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11240 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11241 if (min > sahf_cost)
11243 if (min > fcomi_cost)
11248 /* Return true if we should use an FCOMI instruction for this
11252 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11254 enum rtx_code swapped_code = swap_condition (code);
11256 return ((ix86_fp_comparison_cost (code)
11257 == ix86_fp_comparison_fcomi_cost (code))
11258 || (ix86_fp_comparison_cost (swapped_code)
11259 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11262 /* Swap, force into registers, or otherwise massage the two operands
11263 to a fp comparison. The operands are updated in place; the new
11264 comparison code is returned. */
11266 static enum rtx_code
11267 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11269 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11270 rtx op0 = *pop0, op1 = *pop1;
11271 enum machine_mode op_mode = GET_MODE (op0);
11272 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11274 /* All of the unordered compare instructions only work on registers.
11275 The same is true of the fcomi compare instructions. The XFmode
11276 compare instructions require registers except when comparing
11277 against zero or when converting operand 1 from fixed point to
11281 && (fpcmp_mode == CCFPUmode
11282 || (op_mode == XFmode
11283 && ! (standard_80387_constant_p (op0) == 1
11284 || standard_80387_constant_p (op1) == 1)
11285 && GET_CODE (op1) != FLOAT)
11286 || ix86_use_fcomi_compare (code)))
11288 op0 = force_reg (op_mode, op0);
11289 op1 = force_reg (op_mode, op1);
11293 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11294 things around if they appear profitable, otherwise force op0
11295 into a register. */
11297 if (standard_80387_constant_p (op0) == 0
11299 && ! (standard_80387_constant_p (op1) == 0
11303 tmp = op0, op0 = op1, op1 = tmp;
11304 code = swap_condition (code);
11308 op0 = force_reg (op_mode, op0);
11310 if (CONSTANT_P (op1))
11312 int tmp = standard_80387_constant_p (op1);
11314 op1 = validize_mem (force_const_mem (op_mode, op1));
11318 op1 = force_reg (op_mode, op1);
11321 op1 = force_reg (op_mode, op1);
11325 /* Try to rearrange the comparison to make it cheaper. */
11326 if (ix86_fp_comparison_cost (code)
11327 > ix86_fp_comparison_cost (swap_condition (code))
11328 && (REG_P (op1) || !no_new_pseudos))
11331 tmp = op0, op0 = op1, op1 = tmp;
11332 code = swap_condition (code);
11334 op0 = force_reg (op_mode, op0);
11342 /* Convert comparison codes we use to represent FP comparison to integer
11343 code that will result in proper branch. Return UNKNOWN if no such code
11347 ix86_fp_compare_code_to_integer (enum rtx_code code)
11376 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11379 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11380 rtx *second_test, rtx *bypass_test)
11382 enum machine_mode fpcmp_mode, intcmp_mode;
11384 int cost = ix86_fp_comparison_cost (code);
11385 enum rtx_code bypass_code, first_code, second_code;
11387 fpcmp_mode = ix86_fp_compare_mode (code);
11388 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11391 *second_test = NULL_RTX;
11393 *bypass_test = NULL_RTX;
11395 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11397 /* Do fcomi/sahf based test when profitable. */
11398 if ((TARGET_CMOVE || TARGET_SAHF)
11399 && (bypass_code == UNKNOWN || bypass_test)
11400 && (second_code == UNKNOWN || second_test)
11401 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11405 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11406 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11412 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11413 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11415 scratch = gen_reg_rtx (HImode);
11416 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11417 emit_insn (gen_x86_sahf_1 (scratch));
11420 /* The FP codes work out to act like unsigned. */
11421 intcmp_mode = fpcmp_mode;
11423 if (bypass_code != UNKNOWN)
11424 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11425 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11427 if (second_code != UNKNOWN)
11428 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11429 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11434 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11435 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11436 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11438 scratch = gen_reg_rtx (HImode);
11439 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11441 /* In the unordered case, we have to check C2 for NaN's, which
11442 doesn't happen to work out to anything nice combination-wise.
11443 So do some bit twiddling on the value we've got in AH to come
11444 up with an appropriate set of condition codes. */
11446 intcmp_mode = CCNOmode;
11451 if (code == GT || !TARGET_IEEE_FP)
11453 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11458 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11459 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11460 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11461 intcmp_mode = CCmode;
11467 if (code == LT && TARGET_IEEE_FP)
11469 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11470 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11471 intcmp_mode = CCmode;
11476 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11482 if (code == GE || !TARGET_IEEE_FP)
11484 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11489 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11490 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11497 if (code == LE && TARGET_IEEE_FP)
11499 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11500 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11501 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11502 intcmp_mode = CCmode;
11507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11513 if (code == EQ && TARGET_IEEE_FP)
11515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11516 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11517 intcmp_mode = CCmode;
11522 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11529 if (code == NE && TARGET_IEEE_FP)
11531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11548 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11553 gcc_unreachable ();
11557 /* Return the test that should be put into the flags user, i.e.
11558 the bcc, scc, or cmov instruction. */
11559 return gen_rtx_fmt_ee (code, VOIDmode,
11560 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11565 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11568 op0 = ix86_compare_op0;
11569 op1 = ix86_compare_op1;
11572 *second_test = NULL_RTX;
11574 *bypass_test = NULL_RTX;
11576 if (ix86_compare_emitted)
11578 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11579 ix86_compare_emitted = NULL_RTX;
11581 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11583 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11584 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11585 second_test, bypass_test);
11588 ret = ix86_expand_int_compare (code, op0, op1);
11593 /* Return true if the CODE will result in nontrivial jump sequence. */
11595 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11597 enum rtx_code bypass_code, first_code, second_code;
11600 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11601 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11605 ix86_expand_branch (enum rtx_code code, rtx label)
11609 /* If we have emitted a compare insn, go straight to simple.
11610 ix86_expand_compare won't emit anything if ix86_compare_emitted
11612 if (ix86_compare_emitted)
11615 switch (GET_MODE (ix86_compare_op0))
11621 tmp = ix86_expand_compare (code, NULL, NULL);
11622 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11623 gen_rtx_LABEL_REF (VOIDmode, label),
11625 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11634 enum rtx_code bypass_code, first_code, second_code;
11636 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11637 &ix86_compare_op1);
11639 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11641 /* Check whether we will use the natural sequence with one jump. If
11642 so, we can expand jump early. Otherwise delay expansion by
11643 creating compound insn to not confuse optimizers. */
11644 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11647 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11648 gen_rtx_LABEL_REF (VOIDmode, label),
11649 pc_rtx, NULL_RTX, NULL_RTX);
11653 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11654 ix86_compare_op0, ix86_compare_op1);
11655 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11656 gen_rtx_LABEL_REF (VOIDmode, label),
11658 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11660 use_fcomi = ix86_use_fcomi_compare (code);
11661 vec = rtvec_alloc (3 + !use_fcomi);
11662 RTVEC_ELT (vec, 0) = tmp;
11664 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11666 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11669 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11671 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11680 /* Expand DImode branch into multiple compare+branch. */
11682 rtx lo[2], hi[2], label2;
11683 enum rtx_code code1, code2, code3;
11684 enum machine_mode submode;
11686 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11688 tmp = ix86_compare_op0;
11689 ix86_compare_op0 = ix86_compare_op1;
11690 ix86_compare_op1 = tmp;
11691 code = swap_condition (code);
11693 if (GET_MODE (ix86_compare_op0) == DImode)
11695 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11696 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11701 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11702 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11706 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11707 avoid two branches. This costs one extra insn, so disable when
11708 optimizing for size. */
11710 if ((code == EQ || code == NE)
11712 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11717 if (hi[1] != const0_rtx)
11718 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11719 NULL_RTX, 0, OPTAB_WIDEN);
11722 if (lo[1] != const0_rtx)
11723 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11724 NULL_RTX, 0, OPTAB_WIDEN);
11726 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11727 NULL_RTX, 0, OPTAB_WIDEN);
11729 ix86_compare_op0 = tmp;
11730 ix86_compare_op1 = const0_rtx;
11731 ix86_expand_branch (code, label);
11735 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11736 op1 is a constant and the low word is zero, then we can just
11737 examine the high word. */
11739 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11742 case LT: case LTU: case GE: case GEU:
11743 ix86_compare_op0 = hi[0];
11744 ix86_compare_op1 = hi[1];
11745 ix86_expand_branch (code, label);
11751 /* Otherwise, we need two or three jumps. */
11753 label2 = gen_label_rtx ();
11756 code2 = swap_condition (code);
11757 code3 = unsigned_condition (code);
11761 case LT: case GT: case LTU: case GTU:
11764 case LE: code1 = LT; code2 = GT; break;
11765 case GE: code1 = GT; code2 = LT; break;
11766 case LEU: code1 = LTU; code2 = GTU; break;
11767 case GEU: code1 = GTU; code2 = LTU; break;
11769 case EQ: code1 = UNKNOWN; code2 = NE; break;
11770 case NE: code2 = UNKNOWN; break;
11773 gcc_unreachable ();
11778 * if (hi(a) < hi(b)) goto true;
11779 * if (hi(a) > hi(b)) goto false;
11780 * if (lo(a) < lo(b)) goto true;
11784 ix86_compare_op0 = hi[0];
11785 ix86_compare_op1 = hi[1];
11787 if (code1 != UNKNOWN)
11788 ix86_expand_branch (code1, label);
11789 if (code2 != UNKNOWN)
11790 ix86_expand_branch (code2, label2);
11792 ix86_compare_op0 = lo[0];
11793 ix86_compare_op1 = lo[1];
11794 ix86_expand_branch (code3, label);
11796 if (code2 != UNKNOWN)
11797 emit_label (label2);
11802 gcc_unreachable ();
11806 /* Split branch based on floating point condition. */
11808 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11809 rtx target1, rtx target2, rtx tmp, rtx pushed)
11811 rtx second, bypass;
11812 rtx label = NULL_RTX;
11814 int bypass_probability = -1, second_probability = -1, probability = -1;
11817 if (target2 != pc_rtx)
11820 code = reverse_condition_maybe_unordered (code);
11825 condition = ix86_expand_fp_compare (code, op1, op2,
11826 tmp, &second, &bypass);
11828 /* Remove pushed operand from stack. */
11830 ix86_free_from_memory (GET_MODE (pushed));
11832 if (split_branch_probability >= 0)
11834 /* Distribute the probabilities across the jumps.
11835 Assume the BYPASS and SECOND to be always test
11837 probability = split_branch_probability;
11839 /* Value of 1 is low enough to make no need for probability
11840 to be updated. Later we may run some experiments and see
11841 if unordered values are more frequent in practice. */
11843 bypass_probability = 1;
11845 second_probability = 1;
11847 if (bypass != NULL_RTX)
11849 label = gen_label_rtx ();
11850 i = emit_jump_insn (gen_rtx_SET
11852 gen_rtx_IF_THEN_ELSE (VOIDmode,
11854 gen_rtx_LABEL_REF (VOIDmode,
11857 if (bypass_probability >= 0)
11859 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11860 GEN_INT (bypass_probability),
11863 i = emit_jump_insn (gen_rtx_SET
11865 gen_rtx_IF_THEN_ELSE (VOIDmode,
11866 condition, target1, target2)));
11867 if (probability >= 0)
11869 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11870 GEN_INT (probability),
11872 if (second != NULL_RTX)
11874 i = emit_jump_insn (gen_rtx_SET
11876 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11878 if (second_probability >= 0)
11880 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11881 GEN_INT (second_probability),
11884 if (label != NULL_RTX)
11885 emit_label (label);
11889 ix86_expand_setcc (enum rtx_code code, rtx dest)
11891 rtx ret, tmp, tmpreg, equiv;
11892 rtx second_test, bypass_test;
11894 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11895 return 0; /* FAIL */
11897 gcc_assert (GET_MODE (dest) == QImode);
11899 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11900 PUT_MODE (ret, QImode);
11905 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11906 if (bypass_test || second_test)
11908 rtx test = second_test;
11910 rtx tmp2 = gen_reg_rtx (QImode);
11913 gcc_assert (!second_test);
11914 test = bypass_test;
11916 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11918 PUT_MODE (test, QImode);
11919 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11922 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11924 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11927 /* Attach a REG_EQUAL note describing the comparison result. */
11928 if (ix86_compare_op0 && ix86_compare_op1)
11930 equiv = simplify_gen_relational (code, QImode,
11931 GET_MODE (ix86_compare_op0),
11932 ix86_compare_op0, ix86_compare_op1);
11933 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11936 return 1; /* DONE */
11939 /* Expand comparison setting or clearing carry flag. Return true when
11940 successful and set pop for the operation. */
11942 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11944 enum machine_mode mode =
11945 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11947 /* Do not handle DImode compares that go through special path.
11948 Also we can't deal with FP compares yet. This is possible to add. */
11949 if (mode == (TARGET_64BIT ? TImode : DImode))
11952 if (SCALAR_FLOAT_MODE_P (mode))
11954 rtx second_test = NULL, bypass_test = NULL;
11955 rtx compare_op, compare_seq;
11957 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11959 /* Shortcut: following common codes never translate
11960 into carry flag compares. */
11961 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11962 || code == ORDERED || code == UNORDERED)
11965 /* These comparisons require zero flag; swap operands so they won't. */
11966 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11967 && !TARGET_IEEE_FP)
11972 code = swap_condition (code);
11975 /* Try to expand the comparison and verify that we end up with carry flag
11976 based comparison. This is fails to be true only when we decide to expand
11977 comparison using arithmetic that is not too common scenario. */
11979 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11980 &second_test, &bypass_test);
11981 compare_seq = get_insns ();
11984 if (second_test || bypass_test)
11986 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11987 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11988 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11990 code = GET_CODE (compare_op);
11991 if (code != LTU && code != GEU)
11993 emit_insn (compare_seq);
11997 if (!INTEGRAL_MODE_P (mode))
12005 /* Convert a==0 into (unsigned)a<1. */
12008 if (op1 != const0_rtx)
12011 code = (code == EQ ? LTU : GEU);
12014 /* Convert a>b into b<a or a>=b-1. */
12017 if (CONST_INT_P (op1))
12019 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12020 /* Bail out on overflow. We still can swap operands but that
12021 would force loading of the constant into register. */
12022 if (op1 == const0_rtx
12023 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12025 code = (code == GTU ? GEU : LTU);
12032 code = (code == GTU ? LTU : GEU);
12036 /* Convert a>=0 into (unsigned)a<0x80000000. */
12039 if (mode == DImode || op1 != const0_rtx)
12041 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12042 code = (code == LT ? GEU : LTU);
12046 if (mode == DImode || op1 != constm1_rtx)
12048 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12049 code = (code == LE ? GEU : LTU);
12055 /* Swapping operands may cause constant to appear as first operand. */
12056 if (!nonimmediate_operand (op0, VOIDmode))
12058 if (no_new_pseudos)
12060 op0 = force_reg (mode, op0);
12062 ix86_compare_op0 = op0;
12063 ix86_compare_op1 = op1;
12064 *pop = ix86_expand_compare (code, NULL, NULL);
12065 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12070 ix86_expand_int_movcc (rtx operands[])
12072 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12073 rtx compare_seq, compare_op;
12074 rtx second_test, bypass_test;
12075 enum machine_mode mode = GET_MODE (operands[0]);
12076 bool sign_bit_compare_p = false;;
12079 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12080 compare_seq = get_insns ();
12083 compare_code = GET_CODE (compare_op);
12085 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12086 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12087 sign_bit_compare_p = true;
12089 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12090 HImode insns, we'd be swallowed in word prefix ops. */
12092 if ((mode != HImode || TARGET_FAST_PREFIX)
12093 && (mode != (TARGET_64BIT ? TImode : DImode))
12094 && CONST_INT_P (operands[2])
12095 && CONST_INT_P (operands[3]))
12097 rtx out = operands[0];
12098 HOST_WIDE_INT ct = INTVAL (operands[2]);
12099 HOST_WIDE_INT cf = INTVAL (operands[3]);
12100 HOST_WIDE_INT diff;
12103 /* Sign bit compares are better done using shifts than we do by using
12105 if (sign_bit_compare_p
12106 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12107 ix86_compare_op1, &compare_op))
12109 /* Detect overlap between destination and compare sources. */
12112 if (!sign_bit_compare_p)
12114 bool fpcmp = false;
12116 compare_code = GET_CODE (compare_op);
12118 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12119 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12122 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12125 /* To simplify rest of code, restrict to the GEU case. */
12126 if (compare_code == LTU)
12128 HOST_WIDE_INT tmp = ct;
12131 compare_code = reverse_condition (compare_code);
12132 code = reverse_condition (code);
12137 PUT_CODE (compare_op,
12138 reverse_condition_maybe_unordered
12139 (GET_CODE (compare_op)));
12141 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12145 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12146 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12147 tmp = gen_reg_rtx (mode);
12149 if (mode == DImode)
12150 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12152 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12156 if (code == GT || code == GE)
12157 code = reverse_condition (code);
12160 HOST_WIDE_INT tmp = ct;
12165 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12166 ix86_compare_op1, VOIDmode, 0, -1);
12179 tmp = expand_simple_binop (mode, PLUS,
12181 copy_rtx (tmp), 1, OPTAB_DIRECT);
12192 tmp = expand_simple_binop (mode, IOR,
12194 copy_rtx (tmp), 1, OPTAB_DIRECT);
12196 else if (diff == -1 && ct)
12206 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12208 tmp = expand_simple_binop (mode, PLUS,
12209 copy_rtx (tmp), GEN_INT (cf),
12210 copy_rtx (tmp), 1, OPTAB_DIRECT);
12218 * andl cf - ct, dest
12228 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12231 tmp = expand_simple_binop (mode, AND,
12233 gen_int_mode (cf - ct, mode),
12234 copy_rtx (tmp), 1, OPTAB_DIRECT);
12236 tmp = expand_simple_binop (mode, PLUS,
12237 copy_rtx (tmp), GEN_INT (ct),
12238 copy_rtx (tmp), 1, OPTAB_DIRECT);
12241 if (!rtx_equal_p (tmp, out))
12242 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12244 return 1; /* DONE */
12249 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12252 tmp = ct, ct = cf, cf = tmp;
12255 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12257 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12259 /* We may be reversing unordered compare to normal compare, that
12260 is not valid in general (we may convert non-trapping condition
12261 to trapping one), however on i386 we currently emit all
12262 comparisons unordered. */
12263 compare_code = reverse_condition_maybe_unordered (compare_code);
12264 code = reverse_condition_maybe_unordered (code);
12268 compare_code = reverse_condition (compare_code);
12269 code = reverse_condition (code);
12273 compare_code = UNKNOWN;
12274 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12275 && CONST_INT_P (ix86_compare_op1))
12277 if (ix86_compare_op1 == const0_rtx
12278 && (code == LT || code == GE))
12279 compare_code = code;
12280 else if (ix86_compare_op1 == constm1_rtx)
12284 else if (code == GT)
12289 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12290 if (compare_code != UNKNOWN
12291 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12292 && (cf == -1 || ct == -1))
12294 /* If lea code below could be used, only optimize
12295 if it results in a 2 insn sequence. */
12297 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12298 || diff == 3 || diff == 5 || diff == 9)
12299 || (compare_code == LT && ct == -1)
12300 || (compare_code == GE && cf == -1))
12303 * notl op1 (if necessary)
12311 code = reverse_condition (code);
12314 out = emit_store_flag (out, code, ix86_compare_op0,
12315 ix86_compare_op1, VOIDmode, 0, -1);
12317 out = expand_simple_binop (mode, IOR,
12319 out, 1, OPTAB_DIRECT);
12320 if (out != operands[0])
12321 emit_move_insn (operands[0], out);
12323 return 1; /* DONE */
12328 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12329 || diff == 3 || diff == 5 || diff == 9)
12330 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12332 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12338 * lea cf(dest*(ct-cf)),dest
12342 * This also catches the degenerate setcc-only case.
12348 out = emit_store_flag (out, code, ix86_compare_op0,
12349 ix86_compare_op1, VOIDmode, 0, 1);
12352 /* On x86_64 the lea instruction operates on Pmode, so we need
12353 to get arithmetics done in proper mode to match. */
12355 tmp = copy_rtx (out);
12359 out1 = copy_rtx (out);
12360 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12364 tmp = gen_rtx_PLUS (mode, tmp, out1);
12370 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12373 if (!rtx_equal_p (tmp, out))
12376 out = force_operand (tmp, copy_rtx (out));
12378 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12380 if (!rtx_equal_p (out, operands[0]))
12381 emit_move_insn (operands[0], copy_rtx (out));
12383 return 1; /* DONE */
12387 * General case: Jumpful:
12388 * xorl dest,dest cmpl op1, op2
12389 * cmpl op1, op2 movl ct, dest
12390 * setcc dest jcc 1f
12391 * decl dest movl cf, dest
12392 * andl (cf-ct),dest 1:
12395 * Size 20. Size 14.
12397 * This is reasonably steep, but branch mispredict costs are
12398 * high on modern cpus, so consider failing only if optimizing
12402 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12403 && BRANCH_COST >= 2)
12407 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12412 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12414 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12416 /* We may be reversing unordered compare to normal compare,
12417 that is not valid in general (we may convert non-trapping
12418 condition to trapping one), however on i386 we currently
12419 emit all comparisons unordered. */
12420 code = reverse_condition_maybe_unordered (code);
12424 code = reverse_condition (code);
12425 if (compare_code != UNKNOWN)
12426 compare_code = reverse_condition (compare_code);
12430 if (compare_code != UNKNOWN)
12432 /* notl op1 (if needed)
12437 For x < 0 (resp. x <= -1) there will be no notl,
12438 so if possible swap the constants to get rid of the
12440 True/false will be -1/0 while code below (store flag
12441 followed by decrement) is 0/-1, so the constants need
12442 to be exchanged once more. */
12444 if (compare_code == GE || !cf)
12446 code = reverse_condition (code);
12451 HOST_WIDE_INT tmp = cf;
12456 out = emit_store_flag (out, code, ix86_compare_op0,
12457 ix86_compare_op1, VOIDmode, 0, -1);
12461 out = emit_store_flag (out, code, ix86_compare_op0,
12462 ix86_compare_op1, VOIDmode, 0, 1);
12464 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12465 copy_rtx (out), 1, OPTAB_DIRECT);
12468 out = expand_simple_binop (mode, AND, copy_rtx (out),
12469 gen_int_mode (cf - ct, mode),
12470 copy_rtx (out), 1, OPTAB_DIRECT);
12472 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12473 copy_rtx (out), 1, OPTAB_DIRECT);
12474 if (!rtx_equal_p (out, operands[0]))
12475 emit_move_insn (operands[0], copy_rtx (out));
12477 return 1; /* DONE */
12481 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12483 /* Try a few things more with specific constants and a variable. */
12486 rtx var, orig_out, out, tmp;
12488 if (BRANCH_COST <= 2)
12489 return 0; /* FAIL */
12491 /* If one of the two operands is an interesting constant, load a
12492 constant with the above and mask it in with a logical operation. */
12494 if (CONST_INT_P (operands[2]))
12497 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12498 operands[3] = constm1_rtx, op = and_optab;
12499 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12500 operands[3] = const0_rtx, op = ior_optab;
12502 return 0; /* FAIL */
12504 else if (CONST_INT_P (operands[3]))
12507 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12508 operands[2] = constm1_rtx, op = and_optab;
12509 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12510 operands[2] = const0_rtx, op = ior_optab;
12512 return 0; /* FAIL */
12515 return 0; /* FAIL */
12517 orig_out = operands[0];
12518 tmp = gen_reg_rtx (mode);
12521 /* Recurse to get the constant loaded. */
12522 if (ix86_expand_int_movcc (operands) == 0)
12523 return 0; /* FAIL */
12525 /* Mask in the interesting variable. */
12526 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12528 if (!rtx_equal_p (out, orig_out))
12529 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12531 return 1; /* DONE */
12535 * For comparison with above,
12545 if (! nonimmediate_operand (operands[2], mode))
12546 operands[2] = force_reg (mode, operands[2]);
12547 if (! nonimmediate_operand (operands[3], mode))
12548 operands[3] = force_reg (mode, operands[3]);
12550 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12552 rtx tmp = gen_reg_rtx (mode);
12553 emit_move_insn (tmp, operands[3]);
12556 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12558 rtx tmp = gen_reg_rtx (mode);
12559 emit_move_insn (tmp, operands[2]);
12563 if (! register_operand (operands[2], VOIDmode)
12565 || ! register_operand (operands[3], VOIDmode)))
12566 operands[2] = force_reg (mode, operands[2]);
12569 && ! register_operand (operands[3], VOIDmode))
12570 operands[3] = force_reg (mode, operands[3]);
12572 emit_insn (compare_seq);
12573 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12574 gen_rtx_IF_THEN_ELSE (mode,
12575 compare_op, operands[2],
12578 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12579 gen_rtx_IF_THEN_ELSE (mode,
12581 copy_rtx (operands[3]),
12582 copy_rtx (operands[0]))));
12584 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12585 gen_rtx_IF_THEN_ELSE (mode,
12587 copy_rtx (operands[2]),
12588 copy_rtx (operands[0]))));
12590 return 1; /* DONE */
12593 /* Swap, force into registers, or otherwise massage the two operands
12594 to an sse comparison with a mask result. Thus we differ a bit from
12595 ix86_prepare_fp_compare_args which expects to produce a flags result.
12597 The DEST operand exists to help determine whether to commute commutative
12598 operators. The POP0/POP1 operands are updated in place. The new
12599 comparison code is returned, or UNKNOWN if not implementable. */
12601 static enum rtx_code
12602 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12603 rtx *pop0, rtx *pop1)
12611 /* We have no LTGT as an operator. We could implement it with
12612 NE & ORDERED, but this requires an extra temporary. It's
12613 not clear that it's worth it. */
12620 /* These are supported directly. */
12627 /* For commutative operators, try to canonicalize the destination
12628 operand to be first in the comparison - this helps reload to
12629 avoid extra moves. */
12630 if (!dest || !rtx_equal_p (dest, *pop1))
12638 /* These are not supported directly. Swap the comparison operands
12639 to transform into something that is supported. */
12643 code = swap_condition (code);
12647 gcc_unreachable ();
12653 /* Detect conditional moves that exactly match min/max operational
12654 semantics. Note that this is IEEE safe, as long as we don't
12655 interchange the operands.
12657 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12658 and TRUE if the operation is successful and instructions are emitted. */
12661 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12662 rtx cmp_op1, rtx if_true, rtx if_false)
12664 enum machine_mode mode;
12670 else if (code == UNGE)
12673 if_true = if_false;
12679 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12681 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12686 mode = GET_MODE (dest);
12688 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12689 but MODE may be a vector mode and thus not appropriate. */
12690 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12692 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12695 if_true = force_reg (mode, if_true);
12696 v = gen_rtvec (2, if_true, if_false);
12697 tmp = gen_rtx_UNSPEC (mode, v, u);
12701 code = is_min ? SMIN : SMAX;
12702 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12705 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12709 /* Expand an sse vector comparison. Return the register with the result. */
12712 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12713 rtx op_true, rtx op_false)
12715 enum machine_mode mode = GET_MODE (dest);
12718 cmp_op0 = force_reg (mode, cmp_op0);
12719 if (!nonimmediate_operand (cmp_op1, mode))
12720 cmp_op1 = force_reg (mode, cmp_op1);
12723 || reg_overlap_mentioned_p (dest, op_true)
12724 || reg_overlap_mentioned_p (dest, op_false))
12725 dest = gen_reg_rtx (mode);
12727 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12728 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12733 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12734 operations. This is used for both scalar and vector conditional moves. */
12737 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12739 enum machine_mode mode = GET_MODE (dest);
12742 if (op_false == CONST0_RTX (mode))
12744 op_true = force_reg (mode, op_true);
12745 x = gen_rtx_AND (mode, cmp, op_true);
12746 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12748 else if (op_true == CONST0_RTX (mode))
12750 op_false = force_reg (mode, op_false);
12751 x = gen_rtx_NOT (mode, cmp);
12752 x = gen_rtx_AND (mode, x, op_false);
12753 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12757 op_true = force_reg (mode, op_true);
12758 op_false = force_reg (mode, op_false);
12760 t2 = gen_reg_rtx (mode);
12762 t3 = gen_reg_rtx (mode);
12766 x = gen_rtx_AND (mode, op_true, cmp);
12767 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12769 x = gen_rtx_NOT (mode, cmp);
12770 x = gen_rtx_AND (mode, x, op_false);
12771 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12773 x = gen_rtx_IOR (mode, t3, t2);
12774 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12778 /* Expand a floating-point conditional move. Return true if successful. */
12781 ix86_expand_fp_movcc (rtx operands[])
12783 enum machine_mode mode = GET_MODE (operands[0]);
12784 enum rtx_code code = GET_CODE (operands[1]);
12785 rtx tmp, compare_op, second_test, bypass_test;
12787 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12789 enum machine_mode cmode;
12791 /* Since we've no cmove for sse registers, don't force bad register
12792 allocation just to gain access to it. Deny movcc when the
12793 comparison mode doesn't match the move mode. */
12794 cmode = GET_MODE (ix86_compare_op0);
12795 if (cmode == VOIDmode)
12796 cmode = GET_MODE (ix86_compare_op1);
12800 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12802 &ix86_compare_op1);
12803 if (code == UNKNOWN)
12806 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12807 ix86_compare_op1, operands[2],
12811 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12812 ix86_compare_op1, operands[2], operands[3]);
12813 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12817 /* The floating point conditional move instructions don't directly
12818 support conditions resulting from a signed integer comparison. */
12820 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12822 /* The floating point conditional move instructions don't directly
12823 support signed integer comparisons. */
12825 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12827 gcc_assert (!second_test && !bypass_test);
12828 tmp = gen_reg_rtx (QImode);
12829 ix86_expand_setcc (code, tmp);
12831 ix86_compare_op0 = tmp;
12832 ix86_compare_op1 = const0_rtx;
12833 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12835 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12837 tmp = gen_reg_rtx (mode);
12838 emit_move_insn (tmp, operands[3]);
12841 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12843 tmp = gen_reg_rtx (mode);
12844 emit_move_insn (tmp, operands[2]);
12848 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12849 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12850 operands[2], operands[3])));
12852 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12853 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12854 operands[3], operands[0])));
12856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12857 gen_rtx_IF_THEN_ELSE (mode, second_test,
12858 operands[2], operands[0])));
12863 /* Expand a floating-point vector conditional move; a vcond operation
12864 rather than a movcc operation. */
12867 ix86_expand_fp_vcond (rtx operands[])
12869 enum rtx_code code = GET_CODE (operands[3]);
12872 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12873 &operands[4], &operands[5]);
12874 if (code == UNKNOWN)
12877 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12878 operands[5], operands[1], operands[2]))
12881 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12882 operands[1], operands[2]);
12883 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12887 /* Expand a signed/unsigned integral vector conditional move. */
12890 ix86_expand_int_vcond (rtx operands[])
12892 enum machine_mode mode = GET_MODE (operands[0]);
12893 enum rtx_code code = GET_CODE (operands[3]);
12894 bool negate = false;
12897 cop0 = operands[4];
12898 cop1 = operands[5];
12900 /* Canonicalize the comparison to EQ, GT, GTU. */
12911 code = reverse_condition (code);
12917 code = reverse_condition (code);
12923 code = swap_condition (code);
12924 x = cop0, cop0 = cop1, cop1 = x;
12928 gcc_unreachable ();
12931 /* Only SSE4.1/SSE4.2 supports V2DImode. */
12932 if (mode == V2DImode)
12937 /* SSE4.1 supports EQ. */
12938 if (!TARGET_SSE4_1)
12944 /* SSE4.2 supports GT/GTU. */
12945 if (!TARGET_SSE4_2)
12950 gcc_unreachable ();
12954 /* Unsigned parallel compare is not supported by the hardware. Play some
12955 tricks to turn this into a signed comparison against 0. */
12958 cop0 = force_reg (mode, cop0);
12967 /* Perform a parallel modulo subtraction. */
12968 t1 = gen_reg_rtx (mode);
12969 emit_insn ((mode == V4SImode
12971 : gen_subv2di3) (t1, cop0, cop1));
12973 /* Extract the original sign bit of op0. */
12974 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
12976 t2 = gen_reg_rtx (mode);
12977 emit_insn ((mode == V4SImode
12979 : gen_andv2di3) (t2, cop0, mask));
12981 /* XOR it back into the result of the subtraction. This results
12982 in the sign bit set iff we saw unsigned underflow. */
12983 x = gen_reg_rtx (mode);
12984 emit_insn ((mode == V4SImode
12986 : gen_xorv2di3) (x, t1, t2));
12994 /* Perform a parallel unsigned saturating subtraction. */
12995 x = gen_reg_rtx (mode);
12996 emit_insn (gen_rtx_SET (VOIDmode, x,
12997 gen_rtx_US_MINUS (mode, cop0, cop1)));
13004 gcc_unreachable ();
13008 cop1 = CONST0_RTX (mode);
13011 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13012 operands[1+negate], operands[2-negate]);
13014 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13015 operands[2-negate]);
13019 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13020 true if we should do zero extension, else sign extension. HIGH_P is
13021 true if we want the N/2 high elements, else the low elements. */
13024 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13026 enum machine_mode imode = GET_MODE (operands[1]);
13027 rtx (*unpack)(rtx, rtx, rtx);
13034 unpack = gen_vec_interleave_highv16qi;
13036 unpack = gen_vec_interleave_lowv16qi;
13040 unpack = gen_vec_interleave_highv8hi;
13042 unpack = gen_vec_interleave_lowv8hi;
13046 unpack = gen_vec_interleave_highv4si;
13048 unpack = gen_vec_interleave_lowv4si;
13051 gcc_unreachable ();
13054 dest = gen_lowpart (imode, operands[0]);
13057 se = force_reg (imode, CONST0_RTX (imode));
13059 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13060 operands[1], pc_rtx, pc_rtx);
13062 emit_insn (unpack (dest, operands[1], se));
13065 /* This function performs the same task as ix86_expand_sse_unpack,
13066 but with SSE4.1 instructions. */
13069 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13071 enum machine_mode imode = GET_MODE (operands[1]);
13072 rtx (*unpack)(rtx, rtx);
13079 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13081 unpack = gen_sse4_1_extendv8qiv8hi2;
13085 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13087 unpack = gen_sse4_1_extendv4hiv4si2;
13091 unpack = gen_sse4_1_zero_extendv2siv2di2;
13093 unpack = gen_sse4_1_extendv2siv2di2;
13096 gcc_unreachable ();
13099 dest = operands[0];
13102 /* Shift higher 8 bytes to lower 8 bytes. */
13103 src = gen_reg_rtx (imode);
13104 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13105 gen_lowpart (TImode, operands[1]),
13111 emit_insn (unpack (dest, src));
13114 /* Expand conditional increment or decrement using adb/sbb instructions.
13115 The default case using setcc followed by the conditional move can be
13116 done by generic code. */
13118 ix86_expand_int_addcc (rtx operands[])
13120 enum rtx_code code = GET_CODE (operands[1]);
13122 rtx val = const0_rtx;
13123 bool fpcmp = false;
13124 enum machine_mode mode = GET_MODE (operands[0]);
13126 if (operands[3] != const1_rtx
13127 && operands[3] != constm1_rtx)
13129 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13130 ix86_compare_op1, &compare_op))
13132 code = GET_CODE (compare_op);
13134 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13135 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13138 code = ix86_fp_compare_code_to_integer (code);
13145 PUT_CODE (compare_op,
13146 reverse_condition_maybe_unordered
13147 (GET_CODE (compare_op)));
13149 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13151 PUT_MODE (compare_op, mode);
13153 /* Construct either adc or sbb insn. */
13154 if ((code == LTU) == (operands[3] == constm1_rtx))
13156 switch (GET_MODE (operands[0]))
13159 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13162 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13165 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13168 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13171 gcc_unreachable ();
13176 switch (GET_MODE (operands[0]))
13179 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13182 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13185 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13188 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13191 gcc_unreachable ();
13194 return 1; /* DONE */
13198 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13199 works for floating pointer parameters and nonoffsetable memories.
13200 For pushes, it returns just stack offsets; the values will be saved
13201 in the right order. Maximally three parts are generated. */
13204 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13209 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13211 size = (GET_MODE_SIZE (mode) + 4) / 8;
13213 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13214 gcc_assert (size >= 2 && size <= 3);
13216 /* Optimize constant pool reference to immediates. This is used by fp
13217 moves, that force all constants to memory to allow combining. */
13218 if (MEM_P (operand) && MEM_READONLY_P (operand))
13220 rtx tmp = maybe_get_pool_constant (operand);
13225 if (MEM_P (operand) && !offsettable_memref_p (operand))
13227 /* The only non-offsetable memories we handle are pushes. */
13228 int ok = push_operand (operand, VOIDmode);
13232 operand = copy_rtx (operand);
13233 PUT_MODE (operand, Pmode);
13234 parts[0] = parts[1] = parts[2] = operand;
13238 if (GET_CODE (operand) == CONST_VECTOR)
13240 enum machine_mode imode = int_mode_for_mode (mode);
13241 /* Caution: if we looked through a constant pool memory above,
13242 the operand may actually have a different mode now. That's
13243 ok, since we want to pun this all the way back to an integer. */
13244 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13245 gcc_assert (operand != NULL);
13251 if (mode == DImode)
13252 split_di (&operand, 1, &parts[0], &parts[1]);
13255 if (REG_P (operand))
13257 gcc_assert (reload_completed);
13258 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13259 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13261 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13263 else if (offsettable_memref_p (operand))
13265 operand = adjust_address (operand, SImode, 0);
13266 parts[0] = operand;
13267 parts[1] = adjust_address (operand, SImode, 4);
13269 parts[2] = adjust_address (operand, SImode, 8);
13271 else if (GET_CODE (operand) == CONST_DOUBLE)
13276 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13280 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13281 parts[2] = gen_int_mode (l[2], SImode);
13284 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13287 gcc_unreachable ();
13289 parts[1] = gen_int_mode (l[1], SImode);
13290 parts[0] = gen_int_mode (l[0], SImode);
13293 gcc_unreachable ();
13298 if (mode == TImode)
13299 split_ti (&operand, 1, &parts[0], &parts[1]);
13300 if (mode == XFmode || mode == TFmode)
13302 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13303 if (REG_P (operand))
13305 gcc_assert (reload_completed);
13306 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13307 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13309 else if (offsettable_memref_p (operand))
13311 operand = adjust_address (operand, DImode, 0);
13312 parts[0] = operand;
13313 parts[1] = adjust_address (operand, upper_mode, 8);
13315 else if (GET_CODE (operand) == CONST_DOUBLE)
13320 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13321 real_to_target (l, &r, mode);
13323 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13324 if (HOST_BITS_PER_WIDE_INT >= 64)
13327 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13328 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13331 parts[0] = immed_double_const (l[0], l[1], DImode);
13333 if (upper_mode == SImode)
13334 parts[1] = gen_int_mode (l[2], SImode);
13335 else if (HOST_BITS_PER_WIDE_INT >= 64)
13338 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13339 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13342 parts[1] = immed_double_const (l[2], l[3], DImode);
13345 gcc_unreachable ();
13352 /* Emit insns to perform a move or push of DI, DF, and XF values.
13353 Return false when normal moves are needed; true when all required
13354 insns have been emitted. Operands 2-4 contain the input values
13355 int the correct order; operands 5-7 contain the output values. */
13358 ix86_split_long_move (rtx operands[])
13363 int collisions = 0;
13364 enum machine_mode mode = GET_MODE (operands[0]);
13366 /* The DFmode expanders may ask us to move double.
13367 For 64bit target this is single move. By hiding the fact
13368 here we simplify i386.md splitters. */
13369 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13371 /* Optimize constant pool reference to immediates. This is used by
13372 fp moves, that force all constants to memory to allow combining. */
13374 if (MEM_P (operands[1])
13375 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13376 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13377 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13378 if (push_operand (operands[0], VOIDmode))
13380 operands[0] = copy_rtx (operands[0]);
13381 PUT_MODE (operands[0], Pmode);
13384 operands[0] = gen_lowpart (DImode, operands[0]);
13385 operands[1] = gen_lowpart (DImode, operands[1]);
13386 emit_move_insn (operands[0], operands[1]);
13390 /* The only non-offsettable memory we handle is push. */
13391 if (push_operand (operands[0], VOIDmode))
13394 gcc_assert (!MEM_P (operands[0])
13395 || offsettable_memref_p (operands[0]));
13397 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13398 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13400 /* When emitting push, take care for source operands on the stack. */
13401 if (push && MEM_P (operands[1])
13402 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13405 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13406 XEXP (part[1][2], 0));
13407 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13408 XEXP (part[1][1], 0));
13411 /* We need to do copy in the right order in case an address register
13412 of the source overlaps the destination. */
13413 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13415 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13417 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13420 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13423 /* Collision in the middle part can be handled by reordering. */
13424 if (collisions == 1 && nparts == 3
13425 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13428 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13429 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13432 /* If there are more collisions, we can't handle it by reordering.
13433 Do an lea to the last part and use only one colliding move. */
13434 else if (collisions > 1)
13440 base = part[0][nparts - 1];
13442 /* Handle the case when the last part isn't valid for lea.
13443 Happens in 64-bit mode storing the 12-byte XFmode. */
13444 if (GET_MODE (base) != Pmode)
13445 base = gen_rtx_REG (Pmode, REGNO (base));
13447 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13448 part[1][0] = replace_equiv_address (part[1][0], base);
13449 part[1][1] = replace_equiv_address (part[1][1],
13450 plus_constant (base, UNITS_PER_WORD));
13452 part[1][2] = replace_equiv_address (part[1][2],
13453 plus_constant (base, 8));
13463 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13464 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13465 emit_move_insn (part[0][2], part[1][2]);
13470 /* In 64bit mode we don't have 32bit push available. In case this is
13471 register, it is OK - we will just use larger counterpart. We also
13472 retype memory - these comes from attempt to avoid REX prefix on
13473 moving of second half of TFmode value. */
13474 if (GET_MODE (part[1][1]) == SImode)
13476 switch (GET_CODE (part[1][1]))
13479 part[1][1] = adjust_address (part[1][1], DImode, 0);
13483 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13487 gcc_unreachable ();
13490 if (GET_MODE (part[1][0]) == SImode)
13491 part[1][0] = part[1][1];
13494 emit_move_insn (part[0][1], part[1][1]);
13495 emit_move_insn (part[0][0], part[1][0]);
13499 /* Choose correct order to not overwrite the source before it is copied. */
13500 if ((REG_P (part[0][0])
13501 && REG_P (part[1][1])
13502 && (REGNO (part[0][0]) == REGNO (part[1][1])
13504 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13506 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13510 operands[2] = part[0][2];
13511 operands[3] = part[0][1];
13512 operands[4] = part[0][0];
13513 operands[5] = part[1][2];
13514 operands[6] = part[1][1];
13515 operands[7] = part[1][0];
13519 operands[2] = part[0][1];
13520 operands[3] = part[0][0];
13521 operands[5] = part[1][1];
13522 operands[6] = part[1][0];
13529 operands[2] = part[0][0];
13530 operands[3] = part[0][1];
13531 operands[4] = part[0][2];
13532 operands[5] = part[1][0];
13533 operands[6] = part[1][1];
13534 operands[7] = part[1][2];
13538 operands[2] = part[0][0];
13539 operands[3] = part[0][1];
13540 operands[5] = part[1][0];
13541 operands[6] = part[1][1];
13545 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13548 if (CONST_INT_P (operands[5])
13549 && operands[5] != const0_rtx
13550 && REG_P (operands[2]))
13552 if (CONST_INT_P (operands[6])
13553 && INTVAL (operands[6]) == INTVAL (operands[5]))
13554 operands[6] = operands[2];
13557 && CONST_INT_P (operands[7])
13558 && INTVAL (operands[7]) == INTVAL (operands[5]))
13559 operands[7] = operands[2];
13563 && CONST_INT_P (operands[6])
13564 && operands[6] != const0_rtx
13565 && REG_P (operands[3])
13566 && CONST_INT_P (operands[7])
13567 && INTVAL (operands[7]) == INTVAL (operands[6]))
13568 operands[7] = operands[3];
13571 emit_move_insn (operands[2], operands[5]);
13572 emit_move_insn (operands[3], operands[6]);
13574 emit_move_insn (operands[4], operands[7]);
13579 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13580 left shift by a constant, either using a single shift or
13581 a sequence of add instructions. */
13584 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13588 emit_insn ((mode == DImode
13590 : gen_adddi3) (operand, operand, operand));
13592 else if (!optimize_size
13593 && count * ix86_cost->add <= ix86_cost->shift_const)
13596 for (i=0; i<count; i++)
13598 emit_insn ((mode == DImode
13600 : gen_adddi3) (operand, operand, operand));
13604 emit_insn ((mode == DImode
13606 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13610 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13612 rtx low[2], high[2];
13614 const int single_width = mode == DImode ? 32 : 64;
13616 if (CONST_INT_P (operands[2]))
13618 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13619 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13621 if (count >= single_width)
13623 emit_move_insn (high[0], low[1]);
13624 emit_move_insn (low[0], const0_rtx);
13626 if (count > single_width)
13627 ix86_expand_ashl_const (high[0], count - single_width, mode);
13631 if (!rtx_equal_p (operands[0], operands[1]))
13632 emit_move_insn (operands[0], operands[1]);
13633 emit_insn ((mode == DImode
13635 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13636 ix86_expand_ashl_const (low[0], count, mode);
13641 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13643 if (operands[1] == const1_rtx)
13645 /* Assuming we've chosen a QImode capable registers, then 1 << N
13646 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13647 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13649 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13651 ix86_expand_clear (low[0]);
13652 ix86_expand_clear (high[0]);
13653 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13655 d = gen_lowpart (QImode, low[0]);
13656 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13657 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13658 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13660 d = gen_lowpart (QImode, high[0]);
13661 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13662 s = gen_rtx_NE (QImode, flags, const0_rtx);
13663 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13666 /* Otherwise, we can get the same results by manually performing
13667 a bit extract operation on bit 5/6, and then performing the two
13668 shifts. The two methods of getting 0/1 into low/high are exactly
13669 the same size. Avoiding the shift in the bit extract case helps
13670 pentium4 a bit; no one else seems to care much either way. */
13675 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13676 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13678 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13679 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13681 emit_insn ((mode == DImode
13683 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13684 emit_insn ((mode == DImode
13686 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13687 emit_move_insn (low[0], high[0]);
13688 emit_insn ((mode == DImode
13690 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13693 emit_insn ((mode == DImode
13695 : gen_ashldi3) (low[0], low[0], operands[2]));
13696 emit_insn ((mode == DImode
13698 : gen_ashldi3) (high[0], high[0], operands[2]));
13702 if (operands[1] == constm1_rtx)
13704 /* For -1 << N, we can avoid the shld instruction, because we
13705 know that we're shifting 0...31/63 ones into a -1. */
13706 emit_move_insn (low[0], constm1_rtx);
13708 emit_move_insn (high[0], low[0]);
13710 emit_move_insn (high[0], constm1_rtx);
13714 if (!rtx_equal_p (operands[0], operands[1]))
13715 emit_move_insn (operands[0], operands[1]);
13717 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13718 emit_insn ((mode == DImode
13720 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13723 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13725 if (TARGET_CMOVE && scratch)
13727 ix86_expand_clear (scratch);
13728 emit_insn ((mode == DImode
13729 ? gen_x86_shift_adj_1
13730 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13733 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13737 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13739 rtx low[2], high[2];
13741 const int single_width = mode == DImode ? 32 : 64;
13743 if (CONST_INT_P (operands[2]))
13745 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13746 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13748 if (count == single_width * 2 - 1)
13750 emit_move_insn (high[0], high[1]);
13751 emit_insn ((mode == DImode
13753 : gen_ashrdi3) (high[0], high[0],
13754 GEN_INT (single_width - 1)));
13755 emit_move_insn (low[0], high[0]);
13758 else if (count >= single_width)
13760 emit_move_insn (low[0], high[1]);
13761 emit_move_insn (high[0], low[0]);
13762 emit_insn ((mode == DImode
13764 : gen_ashrdi3) (high[0], high[0],
13765 GEN_INT (single_width - 1)));
13766 if (count > single_width)
13767 emit_insn ((mode == DImode
13769 : gen_ashrdi3) (low[0], low[0],
13770 GEN_INT (count - single_width)));
13774 if (!rtx_equal_p (operands[0], operands[1]))
13775 emit_move_insn (operands[0], operands[1]);
13776 emit_insn ((mode == DImode
13778 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13779 emit_insn ((mode == DImode
13781 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13786 if (!rtx_equal_p (operands[0], operands[1]))
13787 emit_move_insn (operands[0], operands[1]);
13789 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13791 emit_insn ((mode == DImode
13793 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13794 emit_insn ((mode == DImode
13796 : gen_ashrdi3) (high[0], high[0], operands[2]));
13798 if (TARGET_CMOVE && scratch)
13800 emit_move_insn (scratch, high[0]);
13801 emit_insn ((mode == DImode
13803 : gen_ashrdi3) (scratch, scratch,
13804 GEN_INT (single_width - 1)));
13805 emit_insn ((mode == DImode
13806 ? gen_x86_shift_adj_1
13807 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13811 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13818 rtx low[2], high[2];
13820 const int single_width = mode == DImode ? 32 : 64;
13822 if (CONST_INT_P (operands[2]))
13824 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13825 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13827 if (count >= single_width)
13829 emit_move_insn (low[0], high[1]);
13830 ix86_expand_clear (high[0]);
13832 if (count > single_width)
13833 emit_insn ((mode == DImode
13835 : gen_lshrdi3) (low[0], low[0],
13836 GEN_INT (count - single_width)));
13840 if (!rtx_equal_p (operands[0], operands[1]))
13841 emit_move_insn (operands[0], operands[1]);
13842 emit_insn ((mode == DImode
13844 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13845 emit_insn ((mode == DImode
13847 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13852 if (!rtx_equal_p (operands[0], operands[1]))
13853 emit_move_insn (operands[0], operands[1]);
13855 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13857 emit_insn ((mode == DImode
13859 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13860 emit_insn ((mode == DImode
13862 : gen_lshrdi3) (high[0], high[0], operands[2]));
13864 /* Heh. By reversing the arguments, we can reuse this pattern. */
13865 if (TARGET_CMOVE && scratch)
13867 ix86_expand_clear (scratch);
13868 emit_insn ((mode == DImode
13869 ? gen_x86_shift_adj_1
13870 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13874 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13878 /* Predict just emitted jump instruction to be taken with probability PROB. */
13880 predict_jump (int prob)
13882 rtx insn = get_last_insn ();
13883 gcc_assert (JUMP_P (insn));
13885 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13890 /* Helper function for the string operations below. Dest VARIABLE whether
13891 it is aligned to VALUE bytes. If true, jump to the label. */
13893 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13895 rtx label = gen_label_rtx ();
13896 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13897 if (GET_MODE (variable) == DImode)
13898 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13900 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13901 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13904 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13906 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13910 /* Adjust COUNTER by the VALUE. */
13912 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13914 if (GET_MODE (countreg) == DImode)
13915 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13917 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13920 /* Zero extend possibly SImode EXP to Pmode register. */
13922 ix86_zero_extend_to_Pmode (rtx exp)
13925 if (GET_MODE (exp) == VOIDmode)
13926 return force_reg (Pmode, exp);
13927 if (GET_MODE (exp) == Pmode)
13928 return copy_to_mode_reg (Pmode, exp);
13929 r = gen_reg_rtx (Pmode);
13930 emit_insn (gen_zero_extendsidi2 (r, exp));
13934 /* Divide COUNTREG by SCALE. */
13936 scale_counter (rtx countreg, int scale)
13939 rtx piece_size_mask;
13943 if (CONST_INT_P (countreg))
13944 return GEN_INT (INTVAL (countreg) / scale);
13945 gcc_assert (REG_P (countreg));
13947 piece_size_mask = GEN_INT (scale - 1);
13948 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13949 GEN_INT (exact_log2 (scale)),
13950 NULL, 1, OPTAB_DIRECT);
13954 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13955 DImode for constant loop counts. */
13957 static enum machine_mode
13958 counter_mode (rtx count_exp)
13960 if (GET_MODE (count_exp) != VOIDmode)
13961 return GET_MODE (count_exp);
13962 if (GET_CODE (count_exp) != CONST_INT)
13964 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13969 /* When SRCPTR is non-NULL, output simple loop to move memory
13970 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13971 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13972 equivalent loop to set memory by VALUE (supposed to be in MODE).
13974 The size is rounded down to whole number of chunk size moved at once.
13975 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13979 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13980 rtx destptr, rtx srcptr, rtx value,
13981 rtx count, enum machine_mode mode, int unroll,
13984 rtx out_label, top_label, iter, tmp;
13985 enum machine_mode iter_mode = counter_mode (count);
13986 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13987 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13993 top_label = gen_label_rtx ();
13994 out_label = gen_label_rtx ();
13995 iter = gen_reg_rtx (iter_mode);
13997 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13998 NULL, 1, OPTAB_DIRECT);
13999 /* Those two should combine. */
14000 if (piece_size == const1_rtx)
14002 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14004 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14006 emit_move_insn (iter, const0_rtx);
14008 emit_label (top_label);
14010 tmp = convert_modes (Pmode, iter_mode, iter, true);
14011 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14012 destmem = change_address (destmem, mode, x_addr);
14016 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14017 srcmem = change_address (srcmem, mode, y_addr);
14019 /* When unrolling for chips that reorder memory reads and writes,
14020 we can save registers by using single temporary.
14021 Also using 4 temporaries is overkill in 32bit mode. */
14022 if (!TARGET_64BIT && 0)
14024 for (i = 0; i < unroll; i++)
14029 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14031 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14033 emit_move_insn (destmem, srcmem);
14039 gcc_assert (unroll <= 4);
14040 for (i = 0; i < unroll; i++)
14042 tmpreg[i] = gen_reg_rtx (mode);
14046 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14048 emit_move_insn (tmpreg[i], srcmem);
14050 for (i = 0; i < unroll; i++)
14055 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14057 emit_move_insn (destmem, tmpreg[i]);
14062 for (i = 0; i < unroll; i++)
14066 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14067 emit_move_insn (destmem, value);
14070 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14071 true, OPTAB_LIB_WIDEN);
14073 emit_move_insn (iter, tmp);
14075 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14077 if (expected_size != -1)
14079 expected_size /= GET_MODE_SIZE (mode) * unroll;
14080 if (expected_size == 0)
14082 else if (expected_size > REG_BR_PROB_BASE)
14083 predict_jump (REG_BR_PROB_BASE - 1);
14085 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14088 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14089 iter = ix86_zero_extend_to_Pmode (iter);
14090 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14091 true, OPTAB_LIB_WIDEN);
14092 if (tmp != destptr)
14093 emit_move_insn (destptr, tmp);
14096 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14097 true, OPTAB_LIB_WIDEN);
14099 emit_move_insn (srcptr, tmp);
14101 emit_label (out_label);
14104 /* Output "rep; mov" instruction.
14105 Arguments have same meaning as for previous function */
14107 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14108 rtx destptr, rtx srcptr,
14110 enum machine_mode mode)
14116 /* If the size is known, it is shorter to use rep movs. */
14117 if (mode == QImode && CONST_INT_P (count)
14118 && !(INTVAL (count) & 3))
14121 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14122 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14123 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14124 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14125 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14126 if (mode != QImode)
14128 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14129 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14130 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14131 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14132 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14133 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14137 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14138 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14140 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14144 /* Output "rep; stos" instruction.
14145 Arguments have same meaning as for previous function */
14147 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14149 enum machine_mode mode)
14154 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14155 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14156 value = force_reg (mode, gen_lowpart (mode, value));
14157 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14158 if (mode != QImode)
14160 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14161 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14162 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14165 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14166 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14170 emit_strmov (rtx destmem, rtx srcmem,
14171 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14173 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14174 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14175 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14178 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14180 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14181 rtx destptr, rtx srcptr, rtx count, int max_size)
14184 if (CONST_INT_P (count))
14186 HOST_WIDE_INT countval = INTVAL (count);
14189 if ((countval & 0x10) && max_size > 16)
14193 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14194 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14197 gcc_unreachable ();
14200 if ((countval & 0x08) && max_size > 8)
14203 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14206 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14207 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14211 if ((countval & 0x04) && max_size > 4)
14213 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14216 if ((countval & 0x02) && max_size > 2)
14218 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14221 if ((countval & 0x01) && max_size > 1)
14223 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14230 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14231 count, 1, OPTAB_DIRECT);
14232 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14233 count, QImode, 1, 4);
14237 /* When there are stringops, we can cheaply increase dest and src pointers.
14238 Otherwise we save code size by maintaining offset (zero is readily
14239 available from preceding rep operation) and using x86 addressing modes.
14241 if (TARGET_SINGLE_STRINGOP)
14245 rtx label = ix86_expand_aligntest (count, 4, true);
14246 src = change_address (srcmem, SImode, srcptr);
14247 dest = change_address (destmem, SImode, destptr);
14248 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14249 emit_label (label);
14250 LABEL_NUSES (label) = 1;
14254 rtx label = ix86_expand_aligntest (count, 2, true);
14255 src = change_address (srcmem, HImode, srcptr);
14256 dest = change_address (destmem, HImode, destptr);
14257 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14258 emit_label (label);
14259 LABEL_NUSES (label) = 1;
14263 rtx label = ix86_expand_aligntest (count, 1, true);
14264 src = change_address (srcmem, QImode, srcptr);
14265 dest = change_address (destmem, QImode, destptr);
14266 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14267 emit_label (label);
14268 LABEL_NUSES (label) = 1;
14273 rtx offset = force_reg (Pmode, const0_rtx);
14278 rtx label = ix86_expand_aligntest (count, 4, true);
14279 src = change_address (srcmem, SImode, srcptr);
14280 dest = change_address (destmem, SImode, destptr);
14281 emit_move_insn (dest, src);
14282 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14283 true, OPTAB_LIB_WIDEN);
14285 emit_move_insn (offset, tmp);
14286 emit_label (label);
14287 LABEL_NUSES (label) = 1;
14291 rtx label = ix86_expand_aligntest (count, 2, true);
14292 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14293 src = change_address (srcmem, HImode, tmp);
14294 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14295 dest = change_address (destmem, HImode, tmp);
14296 emit_move_insn (dest, src);
14297 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14298 true, OPTAB_LIB_WIDEN);
14300 emit_move_insn (offset, tmp);
14301 emit_label (label);
14302 LABEL_NUSES (label) = 1;
14306 rtx label = ix86_expand_aligntest (count, 1, true);
14307 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14308 src = change_address (srcmem, QImode, tmp);
14309 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14310 dest = change_address (destmem, QImode, tmp);
14311 emit_move_insn (dest, src);
14312 emit_label (label);
14313 LABEL_NUSES (label) = 1;
14318 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14320 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14321 rtx count, int max_size)
14324 expand_simple_binop (counter_mode (count), AND, count,
14325 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14326 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14327 gen_lowpart (QImode, value), count, QImode,
14331 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14333 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14337 if (CONST_INT_P (count))
14339 HOST_WIDE_INT countval = INTVAL (count);
14342 if ((countval & 0x10) && max_size > 16)
14346 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14347 emit_insn (gen_strset (destptr, dest, value));
14348 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14349 emit_insn (gen_strset (destptr, dest, value));
14352 gcc_unreachable ();
14355 if ((countval & 0x08) && max_size > 8)
14359 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14360 emit_insn (gen_strset (destptr, dest, value));
14364 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14365 emit_insn (gen_strset (destptr, dest, value));
14366 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14367 emit_insn (gen_strset (destptr, dest, value));
14371 if ((countval & 0x04) && max_size > 4)
14373 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14374 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14377 if ((countval & 0x02) && max_size > 2)
14379 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14380 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14383 if ((countval & 0x01) && max_size > 1)
14385 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14386 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14393 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14398 rtx label = ix86_expand_aligntest (count, 16, true);
14401 dest = change_address (destmem, DImode, destptr);
14402 emit_insn (gen_strset (destptr, dest, value));
14403 emit_insn (gen_strset (destptr, dest, value));
14407 dest = change_address (destmem, SImode, destptr);
14408 emit_insn (gen_strset (destptr, dest, value));
14409 emit_insn (gen_strset (destptr, dest, value));
14410 emit_insn (gen_strset (destptr, dest, value));
14411 emit_insn (gen_strset (destptr, dest, value));
14413 emit_label (label);
14414 LABEL_NUSES (label) = 1;
14418 rtx label = ix86_expand_aligntest (count, 8, true);
14421 dest = change_address (destmem, DImode, destptr);
14422 emit_insn (gen_strset (destptr, dest, value));
14426 dest = change_address (destmem, SImode, destptr);
14427 emit_insn (gen_strset (destptr, dest, value));
14428 emit_insn (gen_strset (destptr, dest, value));
14430 emit_label (label);
14431 LABEL_NUSES (label) = 1;
14435 rtx label = ix86_expand_aligntest (count, 4, true);
14436 dest = change_address (destmem, SImode, destptr);
14437 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14438 emit_label (label);
14439 LABEL_NUSES (label) = 1;
14443 rtx label = ix86_expand_aligntest (count, 2, true);
14444 dest = change_address (destmem, HImode, destptr);
14445 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14446 emit_label (label);
14447 LABEL_NUSES (label) = 1;
14451 rtx label = ix86_expand_aligntest (count, 1, true);
14452 dest = change_address (destmem, QImode, destptr);
14453 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14454 emit_label (label);
14455 LABEL_NUSES (label) = 1;
14459 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14460 DESIRED_ALIGNMENT. */
14462 expand_movmem_prologue (rtx destmem, rtx srcmem,
14463 rtx destptr, rtx srcptr, rtx count,
14464 int align, int desired_alignment)
14466 if (align <= 1 && desired_alignment > 1)
14468 rtx label = ix86_expand_aligntest (destptr, 1, false);
14469 srcmem = change_address (srcmem, QImode, srcptr);
14470 destmem = change_address (destmem, QImode, destptr);
14471 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14472 ix86_adjust_counter (count, 1);
14473 emit_label (label);
14474 LABEL_NUSES (label) = 1;
14476 if (align <= 2 && desired_alignment > 2)
14478 rtx label = ix86_expand_aligntest (destptr, 2, false);
14479 srcmem = change_address (srcmem, HImode, srcptr);
14480 destmem = change_address (destmem, HImode, destptr);
14481 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14482 ix86_adjust_counter (count, 2);
14483 emit_label (label);
14484 LABEL_NUSES (label) = 1;
14486 if (align <= 4 && desired_alignment > 4)
14488 rtx label = ix86_expand_aligntest (destptr, 4, false);
14489 srcmem = change_address (srcmem, SImode, srcptr);
14490 destmem = change_address (destmem, SImode, destptr);
14491 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14492 ix86_adjust_counter (count, 4);
14493 emit_label (label);
14494 LABEL_NUSES (label) = 1;
14496 gcc_assert (desired_alignment <= 8);
14499 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14500 DESIRED_ALIGNMENT. */
14502 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14503 int align, int desired_alignment)
14505 if (align <= 1 && desired_alignment > 1)
14507 rtx label = ix86_expand_aligntest (destptr, 1, false);
14508 destmem = change_address (destmem, QImode, destptr);
14509 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14510 ix86_adjust_counter (count, 1);
14511 emit_label (label);
14512 LABEL_NUSES (label) = 1;
14514 if (align <= 2 && desired_alignment > 2)
14516 rtx label = ix86_expand_aligntest (destptr, 2, false);
14517 destmem = change_address (destmem, HImode, destptr);
14518 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14519 ix86_adjust_counter (count, 2);
14520 emit_label (label);
14521 LABEL_NUSES (label) = 1;
14523 if (align <= 4 && desired_alignment > 4)
14525 rtx label = ix86_expand_aligntest (destptr, 4, false);
14526 destmem = change_address (destmem, SImode, destptr);
14527 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14528 ix86_adjust_counter (count, 4);
14529 emit_label (label);
14530 LABEL_NUSES (label) = 1;
14532 gcc_assert (desired_alignment <= 8);
14535 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14536 static enum stringop_alg
14537 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14538 int *dynamic_check)
14540 const struct stringop_algs * algs;
14542 *dynamic_check = -1;
14544 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14546 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14547 if (stringop_alg != no_stringop)
14548 return stringop_alg;
14549 /* rep; movq or rep; movl is the smallest variant. */
14550 else if (optimize_size)
14552 if (!count || (count & 3))
14553 return rep_prefix_1_byte;
14555 return rep_prefix_4_byte;
14557 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14559 else if (expected_size != -1 && expected_size < 4)
14560 return loop_1_byte;
14561 else if (expected_size != -1)
14564 enum stringop_alg alg = libcall;
14565 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14567 gcc_assert (algs->size[i].max);
14568 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14570 if (algs->size[i].alg != libcall)
14571 alg = algs->size[i].alg;
14572 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14573 last non-libcall inline algorithm. */
14574 if (TARGET_INLINE_ALL_STRINGOPS)
14576 /* When the current size is best to be copied by a libcall,
14577 but we are still forced to inline, run the heuristic bellow
14578 that will pick code for medium sized blocks. */
14579 if (alg != libcall)
14584 return algs->size[i].alg;
14587 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14589 /* When asked to inline the call anyway, try to pick meaningful choice.
14590 We look for maximal size of block that is faster to copy by hand and
14591 take blocks of at most of that size guessing that average size will
14592 be roughly half of the block.
14594 If this turns out to be bad, we might simply specify the preferred
14595 choice in ix86_costs. */
14596 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14597 && algs->unknown_size == libcall)
14600 enum stringop_alg alg;
14603 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14604 if (algs->size[i].alg != libcall && algs->size[i].alg)
14605 max = algs->size[i].max;
14608 alg = decide_alg (count, max / 2, memset, dynamic_check);
14609 gcc_assert (*dynamic_check == -1);
14610 gcc_assert (alg != libcall);
14611 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14612 *dynamic_check = max;
14615 return algs->unknown_size;
14618 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14619 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14621 decide_alignment (int align,
14622 enum stringop_alg alg,
14625 int desired_align = 0;
14629 gcc_unreachable ();
14631 case unrolled_loop:
14632 desired_align = GET_MODE_SIZE (Pmode);
14634 case rep_prefix_8_byte:
14637 case rep_prefix_4_byte:
14638 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14639 copying whole cacheline at once. */
14640 if (TARGET_PENTIUMPRO)
14645 case rep_prefix_1_byte:
14646 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14647 copying whole cacheline at once. */
14648 if (TARGET_PENTIUMPRO)
14662 if (desired_align < align)
14663 desired_align = align;
14664 if (expected_size != -1 && expected_size < 4)
14665 desired_align = align;
14666 return desired_align;
14669 /* Return the smallest power of 2 greater than VAL. */
14671 smallest_pow2_greater_than (int val)
14679 /* Expand string move (memcpy) operation. Use i386 string operations when
14680 profitable. expand_clrmem contains similar code. The code depends upon
14681 architecture, block size and alignment, but always has the same
14684 1) Prologue guard: Conditional that jumps up to epilogues for small
14685 blocks that can be handled by epilogue alone. This is faster but
14686 also needed for correctness, since prologue assume the block is larger
14687 than the desired alignment.
14689 Optional dynamic check for size and libcall for large
14690 blocks is emitted here too, with -minline-stringops-dynamically.
14692 2) Prologue: copy first few bytes in order to get destination aligned
14693 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14694 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14695 We emit either a jump tree on power of two sized blocks, or a byte loop.
14697 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14698 with specified algorithm.
14700 4) Epilogue: code copying tail of the block that is too small to be
14701 handled by main body (or up to size guarded by prologue guard). */
14704 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14705 rtx expected_align_exp, rtx expected_size_exp)
14711 rtx jump_around_label = NULL;
14712 HOST_WIDE_INT align = 1;
14713 unsigned HOST_WIDE_INT count = 0;
14714 HOST_WIDE_INT expected_size = -1;
14715 int size_needed = 0, epilogue_size_needed;
14716 int desired_align = 0;
14717 enum stringop_alg alg;
14720 if (CONST_INT_P (align_exp))
14721 align = INTVAL (align_exp);
14722 /* i386 can do misaligned access on reasonably increased cost. */
14723 if (CONST_INT_P (expected_align_exp)
14724 && INTVAL (expected_align_exp) > align)
14725 align = INTVAL (expected_align_exp);
14726 if (CONST_INT_P (count_exp))
14727 count = expected_size = INTVAL (count_exp);
14728 if (CONST_INT_P (expected_size_exp) && count == 0)
14729 expected_size = INTVAL (expected_size_exp);
14731 /* Step 0: Decide on preferred algorithm, desired alignment and
14732 size of chunks to be copied by main loop. */
14734 alg = decide_alg (count, expected_size, false, &dynamic_check);
14735 desired_align = decide_alignment (align, alg, expected_size);
14737 if (!TARGET_ALIGN_STRINGOPS)
14738 align = desired_align;
14740 if (alg == libcall)
14742 gcc_assert (alg != no_stringop);
14744 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14745 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14746 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14751 gcc_unreachable ();
14753 size_needed = GET_MODE_SIZE (Pmode);
14755 case unrolled_loop:
14756 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14758 case rep_prefix_8_byte:
14761 case rep_prefix_4_byte:
14764 case rep_prefix_1_byte:
14770 epilogue_size_needed = size_needed;
14772 /* Step 1: Prologue guard. */
14774 /* Alignment code needs count to be in register. */
14775 if (CONST_INT_P (count_exp) && desired_align > align)
14777 enum machine_mode mode = SImode;
14778 if (TARGET_64BIT && (count & ~0xffffffff))
14780 count_exp = force_reg (mode, count_exp);
14782 gcc_assert (desired_align >= 1 && align >= 1);
14784 /* Ensure that alignment prologue won't copy past end of block. */
14785 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14787 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14788 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14789 Make sure it is power of 2. */
14790 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14792 label = gen_label_rtx ();
14793 emit_cmp_and_jump_insns (count_exp,
14794 GEN_INT (epilogue_size_needed),
14795 LTU, 0, counter_mode (count_exp), 1, label);
14796 if (GET_CODE (count_exp) == CONST_INT)
14798 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14799 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14801 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14803 /* Emit code to decide on runtime whether library call or inline should be
14805 if (dynamic_check != -1)
14807 rtx hot_label = gen_label_rtx ();
14808 jump_around_label = gen_label_rtx ();
14809 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14810 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14811 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14812 emit_block_move_via_libcall (dst, src, count_exp, false);
14813 emit_jump (jump_around_label);
14814 emit_label (hot_label);
14817 /* Step 2: Alignment prologue. */
14819 if (desired_align > align)
14821 /* Except for the first move in epilogue, we no longer know
14822 constant offset in aliasing info. It don't seems to worth
14823 the pain to maintain it for the first move, so throw away
14825 src = change_address (src, BLKmode, srcreg);
14826 dst = change_address (dst, BLKmode, destreg);
14827 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14830 if (label && size_needed == 1)
14832 emit_label (label);
14833 LABEL_NUSES (label) = 1;
14837 /* Step 3: Main loop. */
14843 gcc_unreachable ();
14845 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14846 count_exp, QImode, 1, expected_size);
14849 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14850 count_exp, Pmode, 1, expected_size);
14852 case unrolled_loop:
14853 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14854 registers for 4 temporaries anyway. */
14855 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14856 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14859 case rep_prefix_8_byte:
14860 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14863 case rep_prefix_4_byte:
14864 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14867 case rep_prefix_1_byte:
14868 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14872 /* Adjust properly the offset of src and dest memory for aliasing. */
14873 if (CONST_INT_P (count_exp))
14875 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14876 (count / size_needed) * size_needed);
14877 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14878 (count / size_needed) * size_needed);
14882 src = change_address (src, BLKmode, srcreg);
14883 dst = change_address (dst, BLKmode, destreg);
14886 /* Step 4: Epilogue to copy the remaining bytes. */
14890 /* When the main loop is done, COUNT_EXP might hold original count,
14891 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14892 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14893 bytes. Compensate if needed. */
14895 if (size_needed < epilogue_size_needed)
14898 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14899 GEN_INT (size_needed - 1), count_exp, 1,
14901 if (tmp != count_exp)
14902 emit_move_insn (count_exp, tmp);
14904 emit_label (label);
14905 LABEL_NUSES (label) = 1;
14908 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14909 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14910 epilogue_size_needed);
14911 if (jump_around_label)
14912 emit_label (jump_around_label);
14916 /* Helper function for memcpy. For QImode value 0xXY produce
14917 0xXYXYXYXY of wide specified by MODE. This is essentially
14918 a * 0x10101010, but we can do slightly better than
14919 synth_mult by unwinding the sequence by hand on CPUs with
14922 promote_duplicated_reg (enum machine_mode mode, rtx val)
14924 enum machine_mode valmode = GET_MODE (val);
14926 int nops = mode == DImode ? 3 : 2;
14928 gcc_assert (mode == SImode || mode == DImode);
14929 if (val == const0_rtx)
14930 return copy_to_mode_reg (mode, const0_rtx);
14931 if (CONST_INT_P (val))
14933 HOST_WIDE_INT v = INTVAL (val) & 255;
14937 if (mode == DImode)
14938 v |= (v << 16) << 16;
14939 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14942 if (valmode == VOIDmode)
14944 if (valmode != QImode)
14945 val = gen_lowpart (QImode, val);
14946 if (mode == QImode)
14948 if (!TARGET_PARTIAL_REG_STALL)
14950 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14951 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14952 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14953 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14955 rtx reg = convert_modes (mode, QImode, val, true);
14956 tmp = promote_duplicated_reg (mode, const1_rtx);
14957 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14962 rtx reg = convert_modes (mode, QImode, val, true);
14964 if (!TARGET_PARTIAL_REG_STALL)
14965 if (mode == SImode)
14966 emit_insn (gen_movsi_insv_1 (reg, reg));
14968 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14971 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14972 NULL, 1, OPTAB_DIRECT);
14974 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14976 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14977 NULL, 1, OPTAB_DIRECT);
14978 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14979 if (mode == SImode)
14981 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14982 NULL, 1, OPTAB_DIRECT);
14983 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14988 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14989 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14990 alignment from ALIGN to DESIRED_ALIGN. */
14992 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14997 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14998 promoted_val = promote_duplicated_reg (DImode, val);
14999 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15000 promoted_val = promote_duplicated_reg (SImode, val);
15001 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15002 promoted_val = promote_duplicated_reg (HImode, val);
15004 promoted_val = val;
15006 return promoted_val;
15009 /* Expand string clear operation (bzero). Use i386 string operations when
15010 profitable. See expand_movmem comment for explanation of individual
15011 steps performed. */
15013 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15014 rtx expected_align_exp, rtx expected_size_exp)
15019 rtx jump_around_label = NULL;
15020 HOST_WIDE_INT align = 1;
15021 unsigned HOST_WIDE_INT count = 0;
15022 HOST_WIDE_INT expected_size = -1;
15023 int size_needed = 0, epilogue_size_needed;
15024 int desired_align = 0;
15025 enum stringop_alg alg;
15026 rtx promoted_val = NULL;
15027 bool force_loopy_epilogue = false;
15030 if (CONST_INT_P (align_exp))
15031 align = INTVAL (align_exp);
15032 /* i386 can do misaligned access on reasonably increased cost. */
15033 if (CONST_INT_P (expected_align_exp)
15034 && INTVAL (expected_align_exp) > align)
15035 align = INTVAL (expected_align_exp);
15036 if (CONST_INT_P (count_exp))
15037 count = expected_size = INTVAL (count_exp);
15038 if (CONST_INT_P (expected_size_exp) && count == 0)
15039 expected_size = INTVAL (expected_size_exp);
15041 /* Step 0: Decide on preferred algorithm, desired alignment and
15042 size of chunks to be copied by main loop. */
15044 alg = decide_alg (count, expected_size, true, &dynamic_check);
15045 desired_align = decide_alignment (align, alg, expected_size);
15047 if (!TARGET_ALIGN_STRINGOPS)
15048 align = desired_align;
15050 if (alg == libcall)
15052 gcc_assert (alg != no_stringop);
15054 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15055 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15060 gcc_unreachable ();
15062 size_needed = GET_MODE_SIZE (Pmode);
15064 case unrolled_loop:
15065 size_needed = GET_MODE_SIZE (Pmode) * 4;
15067 case rep_prefix_8_byte:
15070 case rep_prefix_4_byte:
15073 case rep_prefix_1_byte:
15078 epilogue_size_needed = size_needed;
15080 /* Step 1: Prologue guard. */
15082 /* Alignment code needs count to be in register. */
15083 if (CONST_INT_P (count_exp) && desired_align > align)
15085 enum machine_mode mode = SImode;
15086 if (TARGET_64BIT && (count & ~0xffffffff))
15088 count_exp = force_reg (mode, count_exp);
15090 /* Do the cheap promotion to allow better CSE across the
15091 main loop and epilogue (ie one load of the big constant in the
15092 front of all code. */
15093 if (CONST_INT_P (val_exp))
15094 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15095 desired_align, align);
15096 /* Ensure that alignment prologue won't copy past end of block. */
15097 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15099 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15100 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15101 Make sure it is power of 2. */
15102 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15104 /* To improve performance of small blocks, we jump around the VAL
15105 promoting mode. This mean that if the promoted VAL is not constant,
15106 we might not use it in the epilogue and have to use byte
15108 if (epilogue_size_needed > 2 && !promoted_val)
15109 force_loopy_epilogue = true;
15110 label = gen_label_rtx ();
15111 emit_cmp_and_jump_insns (count_exp,
15112 GEN_INT (epilogue_size_needed),
15113 LTU, 0, counter_mode (count_exp), 1, label);
15114 if (GET_CODE (count_exp) == CONST_INT)
15116 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15117 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15119 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15121 if (dynamic_check != -1)
15123 rtx hot_label = gen_label_rtx ();
15124 jump_around_label = gen_label_rtx ();
15125 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15126 LEU, 0, counter_mode (count_exp), 1, hot_label);
15127 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15128 set_storage_via_libcall (dst, count_exp, val_exp, false);
15129 emit_jump (jump_around_label);
15130 emit_label (hot_label);
15133 /* Step 2: Alignment prologue. */
15135 /* Do the expensive promotion once we branched off the small blocks. */
15137 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15138 desired_align, align);
15139 gcc_assert (desired_align >= 1 && align >= 1);
15141 if (desired_align > align)
15143 /* Except for the first move in epilogue, we no longer know
15144 constant offset in aliasing info. It don't seems to worth
15145 the pain to maintain it for the first move, so throw away
15147 dst = change_address (dst, BLKmode, destreg);
15148 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15151 if (label && size_needed == 1)
15153 emit_label (label);
15154 LABEL_NUSES (label) = 1;
15158 /* Step 3: Main loop. */
15164 gcc_unreachable ();
15166 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15167 count_exp, QImode, 1, expected_size);
15170 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15171 count_exp, Pmode, 1, expected_size);
15173 case unrolled_loop:
15174 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15175 count_exp, Pmode, 4, expected_size);
15177 case rep_prefix_8_byte:
15178 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15181 case rep_prefix_4_byte:
15182 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15185 case rep_prefix_1_byte:
15186 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15190 /* Adjust properly the offset of src and dest memory for aliasing. */
15191 if (CONST_INT_P (count_exp))
15192 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15193 (count / size_needed) * size_needed);
15195 dst = change_address (dst, BLKmode, destreg);
15197 /* Step 4: Epilogue to copy the remaining bytes. */
15201 /* When the main loop is done, COUNT_EXP might hold original count,
15202 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15203 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15204 bytes. Compensate if needed. */
15206 if (size_needed < desired_align - align)
15209 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15210 GEN_INT (size_needed - 1), count_exp, 1,
15212 size_needed = desired_align - align + 1;
15213 if (tmp != count_exp)
15214 emit_move_insn (count_exp, tmp);
15216 emit_label (label);
15217 LABEL_NUSES (label) = 1;
15219 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15221 if (force_loopy_epilogue)
15222 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15225 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15228 if (jump_around_label)
15229 emit_label (jump_around_label);
15233 /* Expand the appropriate insns for doing strlen if not just doing
15236 out = result, initialized with the start address
15237 align_rtx = alignment of the address.
15238 scratch = scratch register, initialized with the startaddress when
15239 not aligned, otherwise undefined
15241 This is just the body. It needs the initializations mentioned above and
15242 some address computing at the end. These things are done in i386.md. */
15245 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15249 rtx align_2_label = NULL_RTX;
15250 rtx align_3_label = NULL_RTX;
15251 rtx align_4_label = gen_label_rtx ();
15252 rtx end_0_label = gen_label_rtx ();
15254 rtx tmpreg = gen_reg_rtx (SImode);
15255 rtx scratch = gen_reg_rtx (SImode);
15259 if (CONST_INT_P (align_rtx))
15260 align = INTVAL (align_rtx);
15262 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15264 /* Is there a known alignment and is it less than 4? */
15267 rtx scratch1 = gen_reg_rtx (Pmode);
15268 emit_move_insn (scratch1, out);
15269 /* Is there a known alignment and is it not 2? */
15272 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15273 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15275 /* Leave just the 3 lower bits. */
15276 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15277 NULL_RTX, 0, OPTAB_WIDEN);
15279 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15280 Pmode, 1, align_4_label);
15281 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15282 Pmode, 1, align_2_label);
15283 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15284 Pmode, 1, align_3_label);
15288 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15289 check if is aligned to 4 - byte. */
15291 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15292 NULL_RTX, 0, OPTAB_WIDEN);
15294 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15295 Pmode, 1, align_4_label);
15298 mem = change_address (src, QImode, out);
15300 /* Now compare the bytes. */
15302 /* Compare the first n unaligned byte on a byte per byte basis. */
15303 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15304 QImode, 1, end_0_label);
15306 /* Increment the address. */
15308 emit_insn (gen_adddi3 (out, out, const1_rtx));
15310 emit_insn (gen_addsi3 (out, out, const1_rtx));
15312 /* Not needed with an alignment of 2 */
15315 emit_label (align_2_label);
15317 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15321 emit_insn (gen_adddi3 (out, out, const1_rtx));
15323 emit_insn (gen_addsi3 (out, out, const1_rtx));
15325 emit_label (align_3_label);
15328 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15332 emit_insn (gen_adddi3 (out, out, const1_rtx));
15334 emit_insn (gen_addsi3 (out, out, const1_rtx));
15337 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15338 align this loop. It gives only huge programs, but does not help to
15340 emit_label (align_4_label);
15342 mem = change_address (src, SImode, out);
15343 emit_move_insn (scratch, mem);
15345 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15347 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15349 /* This formula yields a nonzero result iff one of the bytes is zero.
15350 This saves three branches inside loop and many cycles. */
15352 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15353 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15354 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15355 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15356 gen_int_mode (0x80808080, SImode)));
15357 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15362 rtx reg = gen_reg_rtx (SImode);
15363 rtx reg2 = gen_reg_rtx (Pmode);
15364 emit_move_insn (reg, tmpreg);
15365 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15367 /* If zero is not in the first two bytes, move two bytes forward. */
15368 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15369 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15370 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15371 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15372 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15375 /* Emit lea manually to avoid clobbering of flags. */
15376 emit_insn (gen_rtx_SET (SImode, reg2,
15377 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15379 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15380 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15381 emit_insn (gen_rtx_SET (VOIDmode, out,
15382 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15389 rtx end_2_label = gen_label_rtx ();
15390 /* Is zero in the first two bytes? */
15392 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15393 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15394 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15395 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15396 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15398 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15399 JUMP_LABEL (tmp) = end_2_label;
15401 /* Not in the first two. Move two bytes forward. */
15402 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15404 emit_insn (gen_adddi3 (out, out, const2_rtx));
15406 emit_insn (gen_addsi3 (out, out, const2_rtx));
15408 emit_label (end_2_label);
15412 /* Avoid branch in fixing the byte. */
15413 tmpreg = gen_lowpart (QImode, tmpreg);
15414 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15415 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15417 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15419 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15421 emit_label (end_0_label);
15424 /* Expand strlen. */
15427 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15429 rtx addr, scratch1, scratch2, scratch3, scratch4;
15431 /* The generic case of strlen expander is long. Avoid it's
15432 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15434 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15435 && !TARGET_INLINE_ALL_STRINGOPS
15437 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15440 addr = force_reg (Pmode, XEXP (src, 0));
15441 scratch1 = gen_reg_rtx (Pmode);
15443 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15446 /* Well it seems that some optimizer does not combine a call like
15447 foo(strlen(bar), strlen(bar));
15448 when the move and the subtraction is done here. It does calculate
15449 the length just once when these instructions are done inside of
15450 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15451 often used and I use one fewer register for the lifetime of
15452 output_strlen_unroll() this is better. */
15454 emit_move_insn (out, addr);
15456 ix86_expand_strlensi_unroll_1 (out, src, align);
15458 /* strlensi_unroll_1 returns the address of the zero at the end of
15459 the string, like memchr(), so compute the length by subtracting
15460 the start address. */
15462 emit_insn (gen_subdi3 (out, out, addr));
15464 emit_insn (gen_subsi3 (out, out, addr));
15469 scratch2 = gen_reg_rtx (Pmode);
15470 scratch3 = gen_reg_rtx (Pmode);
15471 scratch4 = force_reg (Pmode, constm1_rtx);
15473 emit_move_insn (scratch3, addr);
15474 eoschar = force_reg (QImode, eoschar);
15476 src = replace_equiv_address_nv (src, scratch3);
15478 /* If .md starts supporting :P, this can be done in .md. */
15479 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15480 scratch4), UNSPEC_SCAS);
15481 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15484 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15485 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15489 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15490 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15496 /* For given symbol (function) construct code to compute address of it's PLT
15497 entry in large x86-64 PIC model. */
15499 construct_plt_address (rtx symbol)
15501 rtx tmp = gen_reg_rtx (Pmode);
15502 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15504 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15505 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15507 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15508 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15513 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15514 rtx callarg2 ATTRIBUTE_UNUSED,
15515 rtx pop, int sibcall)
15517 rtx use = NULL, call;
15519 if (pop == const0_rtx)
15521 gcc_assert (!TARGET_64BIT || !pop);
15523 if (TARGET_MACHO && !TARGET_64BIT)
15526 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15527 fnaddr = machopic_indirect_call_target (fnaddr);
15532 /* Static functions and indirect calls don't need the pic register. */
15533 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15534 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15535 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15536 use_reg (&use, pic_offset_table_rtx);
15539 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15541 rtx al = gen_rtx_REG (QImode, 0);
15542 emit_move_insn (al, callarg2);
15543 use_reg (&use, al);
15546 if (ix86_cmodel == CM_LARGE_PIC
15547 && GET_CODE (fnaddr) == MEM
15548 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15549 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15550 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15551 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15553 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15554 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15556 if (sibcall && TARGET_64BIT
15557 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15560 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15561 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15562 emit_move_insn (fnaddr, addr);
15563 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15566 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15568 call = gen_rtx_SET (VOIDmode, retval, call);
15571 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15572 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15573 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15576 call = emit_call_insn (call);
15578 CALL_INSN_FUNCTION_USAGE (call) = use;
15582 /* Clear stack slot assignments remembered from previous functions.
15583 This is called from INIT_EXPANDERS once before RTL is emitted for each
15586 static struct machine_function *
15587 ix86_init_machine_status (void)
15589 struct machine_function *f;
15591 f = GGC_CNEW (struct machine_function);
15592 f->use_fast_prologue_epilogue_nregs = -1;
15593 f->tls_descriptor_call_expanded_p = 0;
15598 /* Return a MEM corresponding to a stack slot with mode MODE.
15599 Allocate a new slot if necessary.
15601 The RTL for a function can have several slots available: N is
15602 which slot to use. */
15605 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15607 struct stack_local_entry *s;
15609 gcc_assert (n < MAX_386_STACK_LOCALS);
15611 /* Virtual slot is valid only before vregs are instantiated. */
15612 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15614 for (s = ix86_stack_locals; s; s = s->next)
15615 if (s->mode == mode && s->n == n)
15616 return copy_rtx (s->rtl);
15618 s = (struct stack_local_entry *)
15619 ggc_alloc (sizeof (struct stack_local_entry));
15622 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15624 s->next = ix86_stack_locals;
15625 ix86_stack_locals = s;
15629 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15631 static GTY(()) rtx ix86_tls_symbol;
15633 ix86_tls_get_addr (void)
15636 if (!ix86_tls_symbol)
15638 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15639 (TARGET_ANY_GNU_TLS
15641 ? "___tls_get_addr"
15642 : "__tls_get_addr");
15645 return ix86_tls_symbol;
15648 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15650 static GTY(()) rtx ix86_tls_module_base_symbol;
15652 ix86_tls_module_base (void)
15655 if (!ix86_tls_module_base_symbol)
15657 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15658 "_TLS_MODULE_BASE_");
15659 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15660 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15663 return ix86_tls_module_base_symbol;
15666 /* Calculate the length of the memory address in the instruction
15667 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15670 memory_address_length (rtx addr)
15672 struct ix86_address parts;
15673 rtx base, index, disp;
15677 if (GET_CODE (addr) == PRE_DEC
15678 || GET_CODE (addr) == POST_INC
15679 || GET_CODE (addr) == PRE_MODIFY
15680 || GET_CODE (addr) == POST_MODIFY)
15683 ok = ix86_decompose_address (addr, &parts);
15686 if (parts.base && GET_CODE (parts.base) == SUBREG)
15687 parts.base = SUBREG_REG (parts.base);
15688 if (parts.index && GET_CODE (parts.index) == SUBREG)
15689 parts.index = SUBREG_REG (parts.index);
15692 index = parts.index;
15697 - esp as the base always wants an index,
15698 - ebp as the base always wants a displacement. */
15700 /* Register Indirect. */
15701 if (base && !index && !disp)
15703 /* esp (for its index) and ebp (for its displacement) need
15704 the two-byte modrm form. */
15705 if (addr == stack_pointer_rtx
15706 || addr == arg_pointer_rtx
15707 || addr == frame_pointer_rtx
15708 || addr == hard_frame_pointer_rtx)
15712 /* Direct Addressing. */
15713 else if (disp && !base && !index)
15718 /* Find the length of the displacement constant. */
15721 if (base && satisfies_constraint_K (disp))
15726 /* ebp always wants a displacement. */
15727 else if (base == hard_frame_pointer_rtx)
15730 /* An index requires the two-byte modrm form.... */
15732 /* ...like esp, which always wants an index. */
15733 || base == stack_pointer_rtx
15734 || base == arg_pointer_rtx
15735 || base == frame_pointer_rtx)
15742 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15743 is set, expect that insn have 8bit immediate alternative. */
15745 ix86_attr_length_immediate_default (rtx insn, int shortform)
15749 extract_insn_cached (insn);
15750 for (i = recog_data.n_operands - 1; i >= 0; --i)
15751 if (CONSTANT_P (recog_data.operand[i]))
15754 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15758 switch (get_attr_mode (insn))
15769 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15774 fatal_insn ("unknown insn mode", insn);
15780 /* Compute default value for "length_address" attribute. */
15782 ix86_attr_length_address_default (rtx insn)
15786 if (get_attr_type (insn) == TYPE_LEA)
15788 rtx set = PATTERN (insn);
15790 if (GET_CODE (set) == PARALLEL)
15791 set = XVECEXP (set, 0, 0);
15793 gcc_assert (GET_CODE (set) == SET);
15795 return memory_address_length (SET_SRC (set));
15798 extract_insn_cached (insn);
15799 for (i = recog_data.n_operands - 1; i >= 0; --i)
15800 if (MEM_P (recog_data.operand[i]))
15802 return memory_address_length (XEXP (recog_data.operand[i], 0));
15808 /* Return the maximum number of instructions a cpu can issue. */
15811 ix86_issue_rate (void)
15815 case PROCESSOR_PENTIUM:
15819 case PROCESSOR_PENTIUMPRO:
15820 case PROCESSOR_PENTIUM4:
15821 case PROCESSOR_ATHLON:
15823 case PROCESSOR_AMDFAM10:
15824 case PROCESSOR_NOCONA:
15825 case PROCESSOR_GENERIC32:
15826 case PROCESSOR_GENERIC64:
15829 case PROCESSOR_CORE2:
15837 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15838 by DEP_INSN and nothing set by DEP_INSN. */
15841 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15845 /* Simplify the test for uninteresting insns. */
15846 if (insn_type != TYPE_SETCC
15847 && insn_type != TYPE_ICMOV
15848 && insn_type != TYPE_FCMOV
15849 && insn_type != TYPE_IBR)
15852 if ((set = single_set (dep_insn)) != 0)
15854 set = SET_DEST (set);
15857 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15858 && XVECLEN (PATTERN (dep_insn), 0) == 2
15859 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15860 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15862 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15863 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15868 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15871 /* This test is true if the dependent insn reads the flags but
15872 not any other potentially set register. */
15873 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15876 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15882 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15883 address with operands set by DEP_INSN. */
15886 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15890 if (insn_type == TYPE_LEA
15893 addr = PATTERN (insn);
15895 if (GET_CODE (addr) == PARALLEL)
15896 addr = XVECEXP (addr, 0, 0);
15898 gcc_assert (GET_CODE (addr) == SET);
15900 addr = SET_SRC (addr);
15905 extract_insn_cached (insn);
15906 for (i = recog_data.n_operands - 1; i >= 0; --i)
15907 if (MEM_P (recog_data.operand[i]))
15909 addr = XEXP (recog_data.operand[i], 0);
15916 return modified_in_p (addr, dep_insn);
15920 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15922 enum attr_type insn_type, dep_insn_type;
15923 enum attr_memory memory;
15925 int dep_insn_code_number;
15927 /* Anti and output dependencies have zero cost on all CPUs. */
15928 if (REG_NOTE_KIND (link) != 0)
15931 dep_insn_code_number = recog_memoized (dep_insn);
15933 /* If we can't recognize the insns, we can't really do anything. */
15934 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15937 insn_type = get_attr_type (insn);
15938 dep_insn_type = get_attr_type (dep_insn);
15942 case PROCESSOR_PENTIUM:
15943 /* Address Generation Interlock adds a cycle of latency. */
15944 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15947 /* ??? Compares pair with jump/setcc. */
15948 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15951 /* Floating point stores require value to be ready one cycle earlier. */
15952 if (insn_type == TYPE_FMOV
15953 && get_attr_memory (insn) == MEMORY_STORE
15954 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15958 case PROCESSOR_PENTIUMPRO:
15959 memory = get_attr_memory (insn);
15961 /* INT->FP conversion is expensive. */
15962 if (get_attr_fp_int_src (dep_insn))
15965 /* There is one cycle extra latency between an FP op and a store. */
15966 if (insn_type == TYPE_FMOV
15967 && (set = single_set (dep_insn)) != NULL_RTX
15968 && (set2 = single_set (insn)) != NULL_RTX
15969 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15970 && MEM_P (SET_DEST (set2)))
15973 /* Show ability of reorder buffer to hide latency of load by executing
15974 in parallel with previous instruction in case
15975 previous instruction is not needed to compute the address. */
15976 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15977 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15979 /* Claim moves to take one cycle, as core can issue one load
15980 at time and the next load can start cycle later. */
15981 if (dep_insn_type == TYPE_IMOV
15982 || dep_insn_type == TYPE_FMOV)
15990 memory = get_attr_memory (insn);
15992 /* The esp dependency is resolved before the instruction is really
15994 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15995 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15998 /* INT->FP conversion is expensive. */
15999 if (get_attr_fp_int_src (dep_insn))
16002 /* Show ability of reorder buffer to hide latency of load by executing
16003 in parallel with previous instruction in case
16004 previous instruction is not needed to compute the address. */
16005 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16006 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16008 /* Claim moves to take one cycle, as core can issue one load
16009 at time and the next load can start cycle later. */
16010 if (dep_insn_type == TYPE_IMOV
16011 || dep_insn_type == TYPE_FMOV)
16020 case PROCESSOR_ATHLON:
16022 case PROCESSOR_AMDFAM10:
16023 case PROCESSOR_GENERIC32:
16024 case PROCESSOR_GENERIC64:
16025 memory = get_attr_memory (insn);
16027 /* Show ability of reorder buffer to hide latency of load by executing
16028 in parallel with previous instruction in case
16029 previous instruction is not needed to compute the address. */
16030 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16031 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16033 enum attr_unit unit = get_attr_unit (insn);
16036 /* Because of the difference between the length of integer and
16037 floating unit pipeline preparation stages, the memory operands
16038 for floating point are cheaper.
16040 ??? For Athlon it the difference is most probably 2. */
16041 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16044 loadcost = TARGET_ATHLON ? 2 : 0;
16046 if (cost >= loadcost)
16059 /* How many alternative schedules to try. This should be as wide as the
16060 scheduling freedom in the DFA, but no wider. Making this value too
16061 large results extra work for the scheduler. */
16064 ia32_multipass_dfa_lookahead (void)
16066 if (ix86_tune == PROCESSOR_PENTIUM)
16069 if (ix86_tune == PROCESSOR_PENTIUMPRO
16070 || ix86_tune == PROCESSOR_K6)
16078 /* Compute the alignment given to a constant that is being placed in memory.
16079 EXP is the constant and ALIGN is the alignment that the object would
16081 The value of this function is used instead of that alignment to align
16085 ix86_constant_alignment (tree exp, int align)
16087 if (TREE_CODE (exp) == REAL_CST)
16089 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16091 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16094 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16095 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16096 return BITS_PER_WORD;
16101 /* Compute the alignment for a static variable.
16102 TYPE is the data type, and ALIGN is the alignment that
16103 the object would ordinarily have. The value of this function is used
16104 instead of that alignment to align the object. */
16107 ix86_data_alignment (tree type, int align)
16109 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16111 if (AGGREGATE_TYPE_P (type)
16112 && TYPE_SIZE (type)
16113 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16114 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16115 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16116 && align < max_align)
16119 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16120 to 16byte boundary. */
16123 if (AGGREGATE_TYPE_P (type)
16124 && TYPE_SIZE (type)
16125 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16126 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16127 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16131 if (TREE_CODE (type) == ARRAY_TYPE)
16133 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16135 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16138 else if (TREE_CODE (type) == COMPLEX_TYPE)
16141 if (TYPE_MODE (type) == DCmode && align < 64)
16143 if (TYPE_MODE (type) == XCmode && align < 128)
16146 else if ((TREE_CODE (type) == RECORD_TYPE
16147 || TREE_CODE (type) == UNION_TYPE
16148 || TREE_CODE (type) == QUAL_UNION_TYPE)
16149 && TYPE_FIELDS (type))
16151 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16153 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16156 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16157 || TREE_CODE (type) == INTEGER_TYPE)
16159 if (TYPE_MODE (type) == DFmode && align < 64)
16161 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16168 /* Compute the alignment for a local variable.
16169 TYPE is the data type, and ALIGN is the alignment that
16170 the object would ordinarily have. The value of this macro is used
16171 instead of that alignment to align the object. */
16174 ix86_local_alignment (tree type, int align)
16176 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16177 to 16byte boundary. */
16180 if (AGGREGATE_TYPE_P (type)
16181 && TYPE_SIZE (type)
16182 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16183 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16184 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16187 if (TREE_CODE (type) == ARRAY_TYPE)
16189 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16191 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16194 else if (TREE_CODE (type) == COMPLEX_TYPE)
16196 if (TYPE_MODE (type) == DCmode && align < 64)
16198 if (TYPE_MODE (type) == XCmode && align < 128)
16201 else if ((TREE_CODE (type) == RECORD_TYPE
16202 || TREE_CODE (type) == UNION_TYPE
16203 || TREE_CODE (type) == QUAL_UNION_TYPE)
16204 && TYPE_FIELDS (type))
16206 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16208 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16211 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16212 || TREE_CODE (type) == INTEGER_TYPE)
16215 if (TYPE_MODE (type) == DFmode && align < 64)
16217 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16223 /* Emit RTL insns to initialize the variable parts of a trampoline.
16224 FNADDR is an RTX for the address of the function's pure code.
16225 CXT is an RTX for the static chain value for the function. */
16227 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16231 /* Compute offset from the end of the jmp to the target function. */
16232 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16233 plus_constant (tramp, 10),
16234 NULL_RTX, 1, OPTAB_DIRECT);
16235 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16236 gen_int_mode (0xb9, QImode));
16237 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16238 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16239 gen_int_mode (0xe9, QImode));
16240 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16245 /* Try to load address using shorter movl instead of movabs.
16246 We may want to support movq for kernel mode, but kernel does not use
16247 trampolines at the moment. */
16248 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16250 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16251 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16252 gen_int_mode (0xbb41, HImode));
16253 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16254 gen_lowpart (SImode, fnaddr));
16259 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16260 gen_int_mode (0xbb49, HImode));
16261 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16265 /* Load static chain using movabs to r10. */
16266 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16267 gen_int_mode (0xba49, HImode));
16268 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16271 /* Jump to the r11 */
16272 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16273 gen_int_mode (0xff49, HImode));
16274 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16275 gen_int_mode (0xe3, QImode));
16277 gcc_assert (offset <= TRAMPOLINE_SIZE);
16280 #ifdef ENABLE_EXECUTE_STACK
16281 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16282 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16286 /* Codes for all the SSE/MMX builtins. */
16289 IX86_BUILTIN_ADDPS,
16290 IX86_BUILTIN_ADDSS,
16291 IX86_BUILTIN_DIVPS,
16292 IX86_BUILTIN_DIVSS,
16293 IX86_BUILTIN_MULPS,
16294 IX86_BUILTIN_MULSS,
16295 IX86_BUILTIN_SUBPS,
16296 IX86_BUILTIN_SUBSS,
16298 IX86_BUILTIN_CMPEQPS,
16299 IX86_BUILTIN_CMPLTPS,
16300 IX86_BUILTIN_CMPLEPS,
16301 IX86_BUILTIN_CMPGTPS,
16302 IX86_BUILTIN_CMPGEPS,
16303 IX86_BUILTIN_CMPNEQPS,
16304 IX86_BUILTIN_CMPNLTPS,
16305 IX86_BUILTIN_CMPNLEPS,
16306 IX86_BUILTIN_CMPNGTPS,
16307 IX86_BUILTIN_CMPNGEPS,
16308 IX86_BUILTIN_CMPORDPS,
16309 IX86_BUILTIN_CMPUNORDPS,
16310 IX86_BUILTIN_CMPEQSS,
16311 IX86_BUILTIN_CMPLTSS,
16312 IX86_BUILTIN_CMPLESS,
16313 IX86_BUILTIN_CMPNEQSS,
16314 IX86_BUILTIN_CMPNLTSS,
16315 IX86_BUILTIN_CMPNLESS,
16316 IX86_BUILTIN_CMPNGTSS,
16317 IX86_BUILTIN_CMPNGESS,
16318 IX86_BUILTIN_CMPORDSS,
16319 IX86_BUILTIN_CMPUNORDSS,
16321 IX86_BUILTIN_COMIEQSS,
16322 IX86_BUILTIN_COMILTSS,
16323 IX86_BUILTIN_COMILESS,
16324 IX86_BUILTIN_COMIGTSS,
16325 IX86_BUILTIN_COMIGESS,
16326 IX86_BUILTIN_COMINEQSS,
16327 IX86_BUILTIN_UCOMIEQSS,
16328 IX86_BUILTIN_UCOMILTSS,
16329 IX86_BUILTIN_UCOMILESS,
16330 IX86_BUILTIN_UCOMIGTSS,
16331 IX86_BUILTIN_UCOMIGESS,
16332 IX86_BUILTIN_UCOMINEQSS,
16334 IX86_BUILTIN_CVTPI2PS,
16335 IX86_BUILTIN_CVTPS2PI,
16336 IX86_BUILTIN_CVTSI2SS,
16337 IX86_BUILTIN_CVTSI642SS,
16338 IX86_BUILTIN_CVTSS2SI,
16339 IX86_BUILTIN_CVTSS2SI64,
16340 IX86_BUILTIN_CVTTPS2PI,
16341 IX86_BUILTIN_CVTTSS2SI,
16342 IX86_BUILTIN_CVTTSS2SI64,
16344 IX86_BUILTIN_MAXPS,
16345 IX86_BUILTIN_MAXSS,
16346 IX86_BUILTIN_MINPS,
16347 IX86_BUILTIN_MINSS,
16349 IX86_BUILTIN_LOADUPS,
16350 IX86_BUILTIN_STOREUPS,
16351 IX86_BUILTIN_MOVSS,
16353 IX86_BUILTIN_MOVHLPS,
16354 IX86_BUILTIN_MOVLHPS,
16355 IX86_BUILTIN_LOADHPS,
16356 IX86_BUILTIN_LOADLPS,
16357 IX86_BUILTIN_STOREHPS,
16358 IX86_BUILTIN_STORELPS,
16360 IX86_BUILTIN_MASKMOVQ,
16361 IX86_BUILTIN_MOVMSKPS,
16362 IX86_BUILTIN_PMOVMSKB,
16364 IX86_BUILTIN_MOVNTPS,
16365 IX86_BUILTIN_MOVNTQ,
16367 IX86_BUILTIN_LOADDQU,
16368 IX86_BUILTIN_STOREDQU,
16370 IX86_BUILTIN_PACKSSWB,
16371 IX86_BUILTIN_PACKSSDW,
16372 IX86_BUILTIN_PACKUSWB,
16374 IX86_BUILTIN_PADDB,
16375 IX86_BUILTIN_PADDW,
16376 IX86_BUILTIN_PADDD,
16377 IX86_BUILTIN_PADDQ,
16378 IX86_BUILTIN_PADDSB,
16379 IX86_BUILTIN_PADDSW,
16380 IX86_BUILTIN_PADDUSB,
16381 IX86_BUILTIN_PADDUSW,
16382 IX86_BUILTIN_PSUBB,
16383 IX86_BUILTIN_PSUBW,
16384 IX86_BUILTIN_PSUBD,
16385 IX86_BUILTIN_PSUBQ,
16386 IX86_BUILTIN_PSUBSB,
16387 IX86_BUILTIN_PSUBSW,
16388 IX86_BUILTIN_PSUBUSB,
16389 IX86_BUILTIN_PSUBUSW,
16392 IX86_BUILTIN_PANDN,
16396 IX86_BUILTIN_PAVGB,
16397 IX86_BUILTIN_PAVGW,
16399 IX86_BUILTIN_PCMPEQB,
16400 IX86_BUILTIN_PCMPEQW,
16401 IX86_BUILTIN_PCMPEQD,
16402 IX86_BUILTIN_PCMPGTB,
16403 IX86_BUILTIN_PCMPGTW,
16404 IX86_BUILTIN_PCMPGTD,
16406 IX86_BUILTIN_PMADDWD,
16408 IX86_BUILTIN_PMAXSW,
16409 IX86_BUILTIN_PMAXUB,
16410 IX86_BUILTIN_PMINSW,
16411 IX86_BUILTIN_PMINUB,
16413 IX86_BUILTIN_PMULHUW,
16414 IX86_BUILTIN_PMULHW,
16415 IX86_BUILTIN_PMULLW,
16417 IX86_BUILTIN_PSADBW,
16418 IX86_BUILTIN_PSHUFW,
16420 IX86_BUILTIN_PSLLW,
16421 IX86_BUILTIN_PSLLD,
16422 IX86_BUILTIN_PSLLQ,
16423 IX86_BUILTIN_PSRAW,
16424 IX86_BUILTIN_PSRAD,
16425 IX86_BUILTIN_PSRLW,
16426 IX86_BUILTIN_PSRLD,
16427 IX86_BUILTIN_PSRLQ,
16428 IX86_BUILTIN_PSLLWI,
16429 IX86_BUILTIN_PSLLDI,
16430 IX86_BUILTIN_PSLLQI,
16431 IX86_BUILTIN_PSRAWI,
16432 IX86_BUILTIN_PSRADI,
16433 IX86_BUILTIN_PSRLWI,
16434 IX86_BUILTIN_PSRLDI,
16435 IX86_BUILTIN_PSRLQI,
16437 IX86_BUILTIN_PUNPCKHBW,
16438 IX86_BUILTIN_PUNPCKHWD,
16439 IX86_BUILTIN_PUNPCKHDQ,
16440 IX86_BUILTIN_PUNPCKLBW,
16441 IX86_BUILTIN_PUNPCKLWD,
16442 IX86_BUILTIN_PUNPCKLDQ,
16444 IX86_BUILTIN_SHUFPS,
16446 IX86_BUILTIN_RCPPS,
16447 IX86_BUILTIN_RCPSS,
16448 IX86_BUILTIN_RSQRTPS,
16449 IX86_BUILTIN_RSQRTSS,
16450 IX86_BUILTIN_RSQRTF,
16451 IX86_BUILTIN_SQRTPS,
16452 IX86_BUILTIN_SQRTSS,
16454 IX86_BUILTIN_UNPCKHPS,
16455 IX86_BUILTIN_UNPCKLPS,
16457 IX86_BUILTIN_ANDPS,
16458 IX86_BUILTIN_ANDNPS,
16460 IX86_BUILTIN_XORPS,
16463 IX86_BUILTIN_LDMXCSR,
16464 IX86_BUILTIN_STMXCSR,
16465 IX86_BUILTIN_SFENCE,
16467 /* 3DNow! Original */
16468 IX86_BUILTIN_FEMMS,
16469 IX86_BUILTIN_PAVGUSB,
16470 IX86_BUILTIN_PF2ID,
16471 IX86_BUILTIN_PFACC,
16472 IX86_BUILTIN_PFADD,
16473 IX86_BUILTIN_PFCMPEQ,
16474 IX86_BUILTIN_PFCMPGE,
16475 IX86_BUILTIN_PFCMPGT,
16476 IX86_BUILTIN_PFMAX,
16477 IX86_BUILTIN_PFMIN,
16478 IX86_BUILTIN_PFMUL,
16479 IX86_BUILTIN_PFRCP,
16480 IX86_BUILTIN_PFRCPIT1,
16481 IX86_BUILTIN_PFRCPIT2,
16482 IX86_BUILTIN_PFRSQIT1,
16483 IX86_BUILTIN_PFRSQRT,
16484 IX86_BUILTIN_PFSUB,
16485 IX86_BUILTIN_PFSUBR,
16486 IX86_BUILTIN_PI2FD,
16487 IX86_BUILTIN_PMULHRW,
16489 /* 3DNow! Athlon Extensions */
16490 IX86_BUILTIN_PF2IW,
16491 IX86_BUILTIN_PFNACC,
16492 IX86_BUILTIN_PFPNACC,
16493 IX86_BUILTIN_PI2FW,
16494 IX86_BUILTIN_PSWAPDSI,
16495 IX86_BUILTIN_PSWAPDSF,
16498 IX86_BUILTIN_ADDPD,
16499 IX86_BUILTIN_ADDSD,
16500 IX86_BUILTIN_DIVPD,
16501 IX86_BUILTIN_DIVSD,
16502 IX86_BUILTIN_MULPD,
16503 IX86_BUILTIN_MULSD,
16504 IX86_BUILTIN_SUBPD,
16505 IX86_BUILTIN_SUBSD,
16507 IX86_BUILTIN_CMPEQPD,
16508 IX86_BUILTIN_CMPLTPD,
16509 IX86_BUILTIN_CMPLEPD,
16510 IX86_BUILTIN_CMPGTPD,
16511 IX86_BUILTIN_CMPGEPD,
16512 IX86_BUILTIN_CMPNEQPD,
16513 IX86_BUILTIN_CMPNLTPD,
16514 IX86_BUILTIN_CMPNLEPD,
16515 IX86_BUILTIN_CMPNGTPD,
16516 IX86_BUILTIN_CMPNGEPD,
16517 IX86_BUILTIN_CMPORDPD,
16518 IX86_BUILTIN_CMPUNORDPD,
16519 IX86_BUILTIN_CMPEQSD,
16520 IX86_BUILTIN_CMPLTSD,
16521 IX86_BUILTIN_CMPLESD,
16522 IX86_BUILTIN_CMPNEQSD,
16523 IX86_BUILTIN_CMPNLTSD,
16524 IX86_BUILTIN_CMPNLESD,
16525 IX86_BUILTIN_CMPORDSD,
16526 IX86_BUILTIN_CMPUNORDSD,
16528 IX86_BUILTIN_COMIEQSD,
16529 IX86_BUILTIN_COMILTSD,
16530 IX86_BUILTIN_COMILESD,
16531 IX86_BUILTIN_COMIGTSD,
16532 IX86_BUILTIN_COMIGESD,
16533 IX86_BUILTIN_COMINEQSD,
16534 IX86_BUILTIN_UCOMIEQSD,
16535 IX86_BUILTIN_UCOMILTSD,
16536 IX86_BUILTIN_UCOMILESD,
16537 IX86_BUILTIN_UCOMIGTSD,
16538 IX86_BUILTIN_UCOMIGESD,
16539 IX86_BUILTIN_UCOMINEQSD,
16541 IX86_BUILTIN_MAXPD,
16542 IX86_BUILTIN_MAXSD,
16543 IX86_BUILTIN_MINPD,
16544 IX86_BUILTIN_MINSD,
16546 IX86_BUILTIN_ANDPD,
16547 IX86_BUILTIN_ANDNPD,
16549 IX86_BUILTIN_XORPD,
16551 IX86_BUILTIN_SQRTPD,
16552 IX86_BUILTIN_SQRTSD,
16554 IX86_BUILTIN_UNPCKHPD,
16555 IX86_BUILTIN_UNPCKLPD,
16557 IX86_BUILTIN_SHUFPD,
16559 IX86_BUILTIN_LOADUPD,
16560 IX86_BUILTIN_STOREUPD,
16561 IX86_BUILTIN_MOVSD,
16563 IX86_BUILTIN_LOADHPD,
16564 IX86_BUILTIN_LOADLPD,
16566 IX86_BUILTIN_CVTDQ2PD,
16567 IX86_BUILTIN_CVTDQ2PS,
16569 IX86_BUILTIN_CVTPD2DQ,
16570 IX86_BUILTIN_CVTPD2PI,
16571 IX86_BUILTIN_CVTPD2PS,
16572 IX86_BUILTIN_CVTTPD2DQ,
16573 IX86_BUILTIN_CVTTPD2PI,
16575 IX86_BUILTIN_CVTPI2PD,
16576 IX86_BUILTIN_CVTSI2SD,
16577 IX86_BUILTIN_CVTSI642SD,
16579 IX86_BUILTIN_CVTSD2SI,
16580 IX86_BUILTIN_CVTSD2SI64,
16581 IX86_BUILTIN_CVTSD2SS,
16582 IX86_BUILTIN_CVTSS2SD,
16583 IX86_BUILTIN_CVTTSD2SI,
16584 IX86_BUILTIN_CVTTSD2SI64,
16586 IX86_BUILTIN_CVTPS2DQ,
16587 IX86_BUILTIN_CVTPS2PD,
16588 IX86_BUILTIN_CVTTPS2DQ,
16590 IX86_BUILTIN_MOVNTI,
16591 IX86_BUILTIN_MOVNTPD,
16592 IX86_BUILTIN_MOVNTDQ,
16595 IX86_BUILTIN_MASKMOVDQU,
16596 IX86_BUILTIN_MOVMSKPD,
16597 IX86_BUILTIN_PMOVMSKB128,
16599 IX86_BUILTIN_PACKSSWB128,
16600 IX86_BUILTIN_PACKSSDW128,
16601 IX86_BUILTIN_PACKUSWB128,
16603 IX86_BUILTIN_PADDB128,
16604 IX86_BUILTIN_PADDW128,
16605 IX86_BUILTIN_PADDD128,
16606 IX86_BUILTIN_PADDQ128,
16607 IX86_BUILTIN_PADDSB128,
16608 IX86_BUILTIN_PADDSW128,
16609 IX86_BUILTIN_PADDUSB128,
16610 IX86_BUILTIN_PADDUSW128,
16611 IX86_BUILTIN_PSUBB128,
16612 IX86_BUILTIN_PSUBW128,
16613 IX86_BUILTIN_PSUBD128,
16614 IX86_BUILTIN_PSUBQ128,
16615 IX86_BUILTIN_PSUBSB128,
16616 IX86_BUILTIN_PSUBSW128,
16617 IX86_BUILTIN_PSUBUSB128,
16618 IX86_BUILTIN_PSUBUSW128,
16620 IX86_BUILTIN_PAND128,
16621 IX86_BUILTIN_PANDN128,
16622 IX86_BUILTIN_POR128,
16623 IX86_BUILTIN_PXOR128,
16625 IX86_BUILTIN_PAVGB128,
16626 IX86_BUILTIN_PAVGW128,
16628 IX86_BUILTIN_PCMPEQB128,
16629 IX86_BUILTIN_PCMPEQW128,
16630 IX86_BUILTIN_PCMPEQD128,
16631 IX86_BUILTIN_PCMPGTB128,
16632 IX86_BUILTIN_PCMPGTW128,
16633 IX86_BUILTIN_PCMPGTD128,
16635 IX86_BUILTIN_PMADDWD128,
16637 IX86_BUILTIN_PMAXSW128,
16638 IX86_BUILTIN_PMAXUB128,
16639 IX86_BUILTIN_PMINSW128,
16640 IX86_BUILTIN_PMINUB128,
16642 IX86_BUILTIN_PMULUDQ,
16643 IX86_BUILTIN_PMULUDQ128,
16644 IX86_BUILTIN_PMULHUW128,
16645 IX86_BUILTIN_PMULHW128,
16646 IX86_BUILTIN_PMULLW128,
16648 IX86_BUILTIN_PSADBW128,
16649 IX86_BUILTIN_PSHUFHW,
16650 IX86_BUILTIN_PSHUFLW,
16651 IX86_BUILTIN_PSHUFD,
16653 IX86_BUILTIN_PSLLDQI128,
16654 IX86_BUILTIN_PSLLWI128,
16655 IX86_BUILTIN_PSLLDI128,
16656 IX86_BUILTIN_PSLLQI128,
16657 IX86_BUILTIN_PSRAWI128,
16658 IX86_BUILTIN_PSRADI128,
16659 IX86_BUILTIN_PSRLDQI128,
16660 IX86_BUILTIN_PSRLWI128,
16661 IX86_BUILTIN_PSRLDI128,
16662 IX86_BUILTIN_PSRLQI128,
16664 IX86_BUILTIN_PSLLDQ128,
16665 IX86_BUILTIN_PSLLW128,
16666 IX86_BUILTIN_PSLLD128,
16667 IX86_BUILTIN_PSLLQ128,
16668 IX86_BUILTIN_PSRAW128,
16669 IX86_BUILTIN_PSRAD128,
16670 IX86_BUILTIN_PSRLW128,
16671 IX86_BUILTIN_PSRLD128,
16672 IX86_BUILTIN_PSRLQ128,
16674 IX86_BUILTIN_PUNPCKHBW128,
16675 IX86_BUILTIN_PUNPCKHWD128,
16676 IX86_BUILTIN_PUNPCKHDQ128,
16677 IX86_BUILTIN_PUNPCKHQDQ128,
16678 IX86_BUILTIN_PUNPCKLBW128,
16679 IX86_BUILTIN_PUNPCKLWD128,
16680 IX86_BUILTIN_PUNPCKLDQ128,
16681 IX86_BUILTIN_PUNPCKLQDQ128,
16683 IX86_BUILTIN_CLFLUSH,
16684 IX86_BUILTIN_MFENCE,
16685 IX86_BUILTIN_LFENCE,
16687 /* Prescott New Instructions. */
16688 IX86_BUILTIN_ADDSUBPS,
16689 IX86_BUILTIN_HADDPS,
16690 IX86_BUILTIN_HSUBPS,
16691 IX86_BUILTIN_MOVSHDUP,
16692 IX86_BUILTIN_MOVSLDUP,
16693 IX86_BUILTIN_ADDSUBPD,
16694 IX86_BUILTIN_HADDPD,
16695 IX86_BUILTIN_HSUBPD,
16696 IX86_BUILTIN_LDDQU,
16698 IX86_BUILTIN_MONITOR,
16699 IX86_BUILTIN_MWAIT,
16702 IX86_BUILTIN_PHADDW,
16703 IX86_BUILTIN_PHADDD,
16704 IX86_BUILTIN_PHADDSW,
16705 IX86_BUILTIN_PHSUBW,
16706 IX86_BUILTIN_PHSUBD,
16707 IX86_BUILTIN_PHSUBSW,
16708 IX86_BUILTIN_PMADDUBSW,
16709 IX86_BUILTIN_PMULHRSW,
16710 IX86_BUILTIN_PSHUFB,
16711 IX86_BUILTIN_PSIGNB,
16712 IX86_BUILTIN_PSIGNW,
16713 IX86_BUILTIN_PSIGND,
16714 IX86_BUILTIN_PALIGNR,
16715 IX86_BUILTIN_PABSB,
16716 IX86_BUILTIN_PABSW,
16717 IX86_BUILTIN_PABSD,
16719 IX86_BUILTIN_PHADDW128,
16720 IX86_BUILTIN_PHADDD128,
16721 IX86_BUILTIN_PHADDSW128,
16722 IX86_BUILTIN_PHSUBW128,
16723 IX86_BUILTIN_PHSUBD128,
16724 IX86_BUILTIN_PHSUBSW128,
16725 IX86_BUILTIN_PMADDUBSW128,
16726 IX86_BUILTIN_PMULHRSW128,
16727 IX86_BUILTIN_PSHUFB128,
16728 IX86_BUILTIN_PSIGNB128,
16729 IX86_BUILTIN_PSIGNW128,
16730 IX86_BUILTIN_PSIGND128,
16731 IX86_BUILTIN_PALIGNR128,
16732 IX86_BUILTIN_PABSB128,
16733 IX86_BUILTIN_PABSW128,
16734 IX86_BUILTIN_PABSD128,
16736 /* AMDFAM10 - SSE4A New Instructions. */
16737 IX86_BUILTIN_MOVNTSD,
16738 IX86_BUILTIN_MOVNTSS,
16739 IX86_BUILTIN_EXTRQI,
16740 IX86_BUILTIN_EXTRQ,
16741 IX86_BUILTIN_INSERTQI,
16742 IX86_BUILTIN_INSERTQ,
16745 IX86_BUILTIN_BLENDPD,
16746 IX86_BUILTIN_BLENDPS,
16747 IX86_BUILTIN_BLENDVPD,
16748 IX86_BUILTIN_BLENDVPS,
16749 IX86_BUILTIN_PBLENDVB128,
16750 IX86_BUILTIN_PBLENDW128,
16755 IX86_BUILTIN_INSERTPS128,
16757 IX86_BUILTIN_MOVNTDQA,
16758 IX86_BUILTIN_MPSADBW128,
16759 IX86_BUILTIN_PACKUSDW128,
16760 IX86_BUILTIN_PCMPEQQ,
16761 IX86_BUILTIN_PHMINPOSUW128,
16763 IX86_BUILTIN_PMAXSB128,
16764 IX86_BUILTIN_PMAXSD128,
16765 IX86_BUILTIN_PMAXUD128,
16766 IX86_BUILTIN_PMAXUW128,
16768 IX86_BUILTIN_PMINSB128,
16769 IX86_BUILTIN_PMINSD128,
16770 IX86_BUILTIN_PMINUD128,
16771 IX86_BUILTIN_PMINUW128,
16773 IX86_BUILTIN_PMOVSXBW128,
16774 IX86_BUILTIN_PMOVSXBD128,
16775 IX86_BUILTIN_PMOVSXBQ128,
16776 IX86_BUILTIN_PMOVSXWD128,
16777 IX86_BUILTIN_PMOVSXWQ128,
16778 IX86_BUILTIN_PMOVSXDQ128,
16780 IX86_BUILTIN_PMOVZXBW128,
16781 IX86_BUILTIN_PMOVZXBD128,
16782 IX86_BUILTIN_PMOVZXBQ128,
16783 IX86_BUILTIN_PMOVZXWD128,
16784 IX86_BUILTIN_PMOVZXWQ128,
16785 IX86_BUILTIN_PMOVZXDQ128,
16787 IX86_BUILTIN_PMULDQ128,
16788 IX86_BUILTIN_PMULLD128,
16790 IX86_BUILTIN_ROUNDPD,
16791 IX86_BUILTIN_ROUNDPS,
16792 IX86_BUILTIN_ROUNDSD,
16793 IX86_BUILTIN_ROUNDSS,
16795 IX86_BUILTIN_PTESTZ,
16796 IX86_BUILTIN_PTESTC,
16797 IX86_BUILTIN_PTESTNZC,
16799 IX86_BUILTIN_VEC_INIT_V2SI,
16800 IX86_BUILTIN_VEC_INIT_V4HI,
16801 IX86_BUILTIN_VEC_INIT_V8QI,
16802 IX86_BUILTIN_VEC_EXT_V2DF,
16803 IX86_BUILTIN_VEC_EXT_V2DI,
16804 IX86_BUILTIN_VEC_EXT_V4SF,
16805 IX86_BUILTIN_VEC_EXT_V4SI,
16806 IX86_BUILTIN_VEC_EXT_V8HI,
16807 IX86_BUILTIN_VEC_EXT_V2SI,
16808 IX86_BUILTIN_VEC_EXT_V4HI,
16809 IX86_BUILTIN_VEC_EXT_V16QI,
16810 IX86_BUILTIN_VEC_SET_V2DI,
16811 IX86_BUILTIN_VEC_SET_V4SF,
16812 IX86_BUILTIN_VEC_SET_V4SI,
16813 IX86_BUILTIN_VEC_SET_V8HI,
16814 IX86_BUILTIN_VEC_SET_V4HI,
16815 IX86_BUILTIN_VEC_SET_V16QI,
16817 IX86_BUILTIN_VEC_PACK_SFIX,
16820 IX86_BUILTIN_CRC32QI,
16821 IX86_BUILTIN_CRC32HI,
16822 IX86_BUILTIN_CRC32SI,
16823 IX86_BUILTIN_CRC32DI,
16825 IX86_BUILTIN_PCMPESTRI128,
16826 IX86_BUILTIN_PCMPESTRM128,
16827 IX86_BUILTIN_PCMPESTRA128,
16828 IX86_BUILTIN_PCMPESTRC128,
16829 IX86_BUILTIN_PCMPESTRO128,
16830 IX86_BUILTIN_PCMPESTRS128,
16831 IX86_BUILTIN_PCMPESTRZ128,
16832 IX86_BUILTIN_PCMPISTRI128,
16833 IX86_BUILTIN_PCMPISTRM128,
16834 IX86_BUILTIN_PCMPISTRA128,
16835 IX86_BUILTIN_PCMPISTRC128,
16836 IX86_BUILTIN_PCMPISTRO128,
16837 IX86_BUILTIN_PCMPISTRS128,
16838 IX86_BUILTIN_PCMPISTRZ128,
16840 IX86_BUILTIN_PCMPGTQ,
16842 /* TFmode support builtins. */
16844 IX86_BUILTIN_FABSQ,
16845 IX86_BUILTIN_COPYSIGNQ,
16850 /* Table for the ix86 builtin decls. */
16851 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16853 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16854 * if the target_flags include one of MASK. Stores the function decl
16855 * in the ix86_builtins array.
16856 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16859 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16861 tree decl = NULL_TREE;
16863 if (mask & ix86_isa_flags
16864 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16866 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16868 ix86_builtins[(int) code] = decl;
16874 /* Like def_builtin, but also marks the function decl "const". */
16877 def_builtin_const (int mask, const char *name, tree type,
16878 enum ix86_builtins code)
16880 tree decl = def_builtin (mask, name, type, code);
16882 TREE_READONLY (decl) = 1;
16886 /* Bits for builtin_description.flag. */
16888 /* Set when we don't support the comparison natively, and should
16889 swap_comparison in order to support it. */
16890 #define BUILTIN_DESC_SWAP_OPERANDS 1
16892 struct builtin_description
16894 const unsigned int mask;
16895 const enum insn_code icode;
16896 const char *const name;
16897 const enum ix86_builtins code;
16898 const enum rtx_code comparison;
16902 static const struct builtin_description bdesc_comi[] =
16904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16930 static const struct builtin_description bdesc_ptest[] =
16933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16938 static const struct builtin_description bdesc_pcmpestr[] =
16941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
16942 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
16943 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
16944 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
16945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
16946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
16947 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
16950 static const struct builtin_description bdesc_pcmpistr[] =
16953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
16954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
16955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
16956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
16957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
16958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
16959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
16962 static const struct builtin_description bdesc_crc32[] =
16965 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
16966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
16967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
16968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
16971 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
16972 static const struct builtin_description bdesc_sse_3arg[] =
16975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
16976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
16977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
16978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
16979 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
16980 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
16981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
16982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
16983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
16984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
16985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
16986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
16989 static const struct builtin_description bdesc_2arg[] =
16992 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
16993 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
16994 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
16995 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
16996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
16997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17024 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17025 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17029 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17031 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17032 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17044 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17068 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17069 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17081 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17086 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17097 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17122 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17157 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17176 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17184 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17185 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17187 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17188 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17192 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17193 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17243 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17250 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17299 static const struct builtin_description bdesc_1arg[] =
17301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17304 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17334 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17341 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17367 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17372 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17373 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17376 ix86_init_mmx_sse_builtins (void)
17378 const struct builtin_description * d;
17381 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17382 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17383 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17384 tree V2DI_type_node
17385 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17386 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17387 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17388 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17389 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17390 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17391 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17393 tree pchar_type_node = build_pointer_type (char_type_node);
17394 tree pcchar_type_node = build_pointer_type (
17395 build_type_variant (char_type_node, 1, 0));
17396 tree pfloat_type_node = build_pointer_type (float_type_node);
17397 tree pcfloat_type_node = build_pointer_type (
17398 build_type_variant (float_type_node, 1, 0));
17399 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17400 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17401 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17404 tree int_ftype_v4sf_v4sf
17405 = build_function_type_list (integer_type_node,
17406 V4SF_type_node, V4SF_type_node, NULL_TREE);
17407 tree v4si_ftype_v4sf_v4sf
17408 = build_function_type_list (V4SI_type_node,
17409 V4SF_type_node, V4SF_type_node, NULL_TREE);
17410 /* MMX/SSE/integer conversions. */
17411 tree int_ftype_v4sf
17412 = build_function_type_list (integer_type_node,
17413 V4SF_type_node, NULL_TREE);
17414 tree int64_ftype_v4sf
17415 = build_function_type_list (long_long_integer_type_node,
17416 V4SF_type_node, NULL_TREE);
17417 tree int_ftype_v8qi
17418 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17419 tree v4sf_ftype_v4sf_int
17420 = build_function_type_list (V4SF_type_node,
17421 V4SF_type_node, integer_type_node, NULL_TREE);
17422 tree v4sf_ftype_v4sf_int64
17423 = build_function_type_list (V4SF_type_node,
17424 V4SF_type_node, long_long_integer_type_node,
17426 tree v4sf_ftype_v4sf_v2si
17427 = build_function_type_list (V4SF_type_node,
17428 V4SF_type_node, V2SI_type_node, NULL_TREE);
17430 /* Miscellaneous. */
17431 tree v8qi_ftype_v4hi_v4hi
17432 = build_function_type_list (V8QI_type_node,
17433 V4HI_type_node, V4HI_type_node, NULL_TREE);
17434 tree v4hi_ftype_v2si_v2si
17435 = build_function_type_list (V4HI_type_node,
17436 V2SI_type_node, V2SI_type_node, NULL_TREE);
17437 tree v4sf_ftype_v4sf_v4sf_int
17438 = build_function_type_list (V4SF_type_node,
17439 V4SF_type_node, V4SF_type_node,
17440 integer_type_node, NULL_TREE);
17441 tree v2si_ftype_v4hi_v4hi
17442 = build_function_type_list (V2SI_type_node,
17443 V4HI_type_node, V4HI_type_node, NULL_TREE);
17444 tree v4hi_ftype_v4hi_int
17445 = build_function_type_list (V4HI_type_node,
17446 V4HI_type_node, integer_type_node, NULL_TREE);
17447 tree v4hi_ftype_v4hi_di
17448 = build_function_type_list (V4HI_type_node,
17449 V4HI_type_node, long_long_unsigned_type_node,
17451 tree v2si_ftype_v2si_di
17452 = build_function_type_list (V2SI_type_node,
17453 V2SI_type_node, long_long_unsigned_type_node,
17455 tree void_ftype_void
17456 = build_function_type (void_type_node, void_list_node);
17457 tree void_ftype_unsigned
17458 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17459 tree void_ftype_unsigned_unsigned
17460 = build_function_type_list (void_type_node, unsigned_type_node,
17461 unsigned_type_node, NULL_TREE);
17462 tree void_ftype_pcvoid_unsigned_unsigned
17463 = build_function_type_list (void_type_node, const_ptr_type_node,
17464 unsigned_type_node, unsigned_type_node,
17466 tree unsigned_ftype_void
17467 = build_function_type (unsigned_type_node, void_list_node);
17468 tree v2si_ftype_v4sf
17469 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17470 /* Loads/stores. */
17471 tree void_ftype_v8qi_v8qi_pchar
17472 = build_function_type_list (void_type_node,
17473 V8QI_type_node, V8QI_type_node,
17474 pchar_type_node, NULL_TREE);
17475 tree v4sf_ftype_pcfloat
17476 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17477 /* @@@ the type is bogus */
17478 tree v4sf_ftype_v4sf_pv2si
17479 = build_function_type_list (V4SF_type_node,
17480 V4SF_type_node, pv2si_type_node, NULL_TREE);
17481 tree void_ftype_pv2si_v4sf
17482 = build_function_type_list (void_type_node,
17483 pv2si_type_node, V4SF_type_node, NULL_TREE);
17484 tree void_ftype_pfloat_v4sf
17485 = build_function_type_list (void_type_node,
17486 pfloat_type_node, V4SF_type_node, NULL_TREE);
17487 tree void_ftype_pdi_di
17488 = build_function_type_list (void_type_node,
17489 pdi_type_node, long_long_unsigned_type_node,
17491 tree void_ftype_pv2di_v2di
17492 = build_function_type_list (void_type_node,
17493 pv2di_type_node, V2DI_type_node, NULL_TREE);
17494 /* Normal vector unops. */
17495 tree v4sf_ftype_v4sf
17496 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17497 tree v16qi_ftype_v16qi
17498 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17499 tree v8hi_ftype_v8hi
17500 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17501 tree v4si_ftype_v4si
17502 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17503 tree v8qi_ftype_v8qi
17504 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17505 tree v4hi_ftype_v4hi
17506 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17508 /* Normal vector binops. */
17509 tree v4sf_ftype_v4sf_v4sf
17510 = build_function_type_list (V4SF_type_node,
17511 V4SF_type_node, V4SF_type_node, NULL_TREE);
17512 tree v8qi_ftype_v8qi_v8qi
17513 = build_function_type_list (V8QI_type_node,
17514 V8QI_type_node, V8QI_type_node, NULL_TREE);
17515 tree v4hi_ftype_v4hi_v4hi
17516 = build_function_type_list (V4HI_type_node,
17517 V4HI_type_node, V4HI_type_node, NULL_TREE);
17518 tree v2si_ftype_v2si_v2si
17519 = build_function_type_list (V2SI_type_node,
17520 V2SI_type_node, V2SI_type_node, NULL_TREE);
17521 tree di_ftype_di_di
17522 = build_function_type_list (long_long_unsigned_type_node,
17523 long_long_unsigned_type_node,
17524 long_long_unsigned_type_node, NULL_TREE);
17526 tree di_ftype_di_di_int
17527 = build_function_type_list (long_long_unsigned_type_node,
17528 long_long_unsigned_type_node,
17529 long_long_unsigned_type_node,
17530 integer_type_node, NULL_TREE);
17532 tree v2si_ftype_v2sf
17533 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17534 tree v2sf_ftype_v2si
17535 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17536 tree v2si_ftype_v2si
17537 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17538 tree v2sf_ftype_v2sf
17539 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17540 tree v2sf_ftype_v2sf_v2sf
17541 = build_function_type_list (V2SF_type_node,
17542 V2SF_type_node, V2SF_type_node, NULL_TREE);
17543 tree v2si_ftype_v2sf_v2sf
17544 = build_function_type_list (V2SI_type_node,
17545 V2SF_type_node, V2SF_type_node, NULL_TREE);
17546 tree pint_type_node = build_pointer_type (integer_type_node);
17547 tree pdouble_type_node = build_pointer_type (double_type_node);
17548 tree pcdouble_type_node = build_pointer_type (
17549 build_type_variant (double_type_node, 1, 0));
17550 tree int_ftype_v2df_v2df
17551 = build_function_type_list (integer_type_node,
17552 V2DF_type_node, V2DF_type_node, NULL_TREE);
17554 tree void_ftype_pcvoid
17555 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17556 tree v4sf_ftype_v4si
17557 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17558 tree v4si_ftype_v4sf
17559 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17560 tree v2df_ftype_v4si
17561 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17562 tree v4si_ftype_v2df
17563 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17564 tree v4si_ftype_v2df_v2df
17565 = build_function_type_list (V4SI_type_node,
17566 V2DF_type_node, V2DF_type_node, NULL_TREE);
17567 tree v2si_ftype_v2df
17568 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17569 tree v4sf_ftype_v2df
17570 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17571 tree v2df_ftype_v2si
17572 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17573 tree v2df_ftype_v4sf
17574 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17575 tree int_ftype_v2df
17576 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17577 tree int64_ftype_v2df
17578 = build_function_type_list (long_long_integer_type_node,
17579 V2DF_type_node, NULL_TREE);
17580 tree v2df_ftype_v2df_int
17581 = build_function_type_list (V2DF_type_node,
17582 V2DF_type_node, integer_type_node, NULL_TREE);
17583 tree v2df_ftype_v2df_int64
17584 = build_function_type_list (V2DF_type_node,
17585 V2DF_type_node, long_long_integer_type_node,
17587 tree v4sf_ftype_v4sf_v2df
17588 = build_function_type_list (V4SF_type_node,
17589 V4SF_type_node, V2DF_type_node, NULL_TREE);
17590 tree v2df_ftype_v2df_v4sf
17591 = build_function_type_list (V2DF_type_node,
17592 V2DF_type_node, V4SF_type_node, NULL_TREE);
17593 tree v2df_ftype_v2df_v2df_int
17594 = build_function_type_list (V2DF_type_node,
17595 V2DF_type_node, V2DF_type_node,
17598 tree v2df_ftype_v2df_pcdouble
17599 = build_function_type_list (V2DF_type_node,
17600 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17601 tree void_ftype_pdouble_v2df
17602 = build_function_type_list (void_type_node,
17603 pdouble_type_node, V2DF_type_node, NULL_TREE);
17604 tree void_ftype_pint_int
17605 = build_function_type_list (void_type_node,
17606 pint_type_node, integer_type_node, NULL_TREE);
17607 tree void_ftype_v16qi_v16qi_pchar
17608 = build_function_type_list (void_type_node,
17609 V16QI_type_node, V16QI_type_node,
17610 pchar_type_node, NULL_TREE);
17611 tree v2df_ftype_pcdouble
17612 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17613 tree v2df_ftype_v2df_v2df
17614 = build_function_type_list (V2DF_type_node,
17615 V2DF_type_node, V2DF_type_node, NULL_TREE);
17616 tree v16qi_ftype_v16qi_v16qi
17617 = build_function_type_list (V16QI_type_node,
17618 V16QI_type_node, V16QI_type_node, NULL_TREE);
17619 tree v8hi_ftype_v8hi_v8hi
17620 = build_function_type_list (V8HI_type_node,
17621 V8HI_type_node, V8HI_type_node, NULL_TREE);
17622 tree v4si_ftype_v4si_v4si
17623 = build_function_type_list (V4SI_type_node,
17624 V4SI_type_node, V4SI_type_node, NULL_TREE);
17625 tree v2di_ftype_v2di_v2di
17626 = build_function_type_list (V2DI_type_node,
17627 V2DI_type_node, V2DI_type_node, NULL_TREE);
17628 tree v2di_ftype_v2df_v2df
17629 = build_function_type_list (V2DI_type_node,
17630 V2DF_type_node, V2DF_type_node, NULL_TREE);
17631 tree v2df_ftype_v2df
17632 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17633 tree v2di_ftype_v2di_int
17634 = build_function_type_list (V2DI_type_node,
17635 V2DI_type_node, integer_type_node, NULL_TREE);
17636 tree v2di_ftype_v2di_v2di_int
17637 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17638 V2DI_type_node, integer_type_node, NULL_TREE);
17639 tree v4si_ftype_v4si_int
17640 = build_function_type_list (V4SI_type_node,
17641 V4SI_type_node, integer_type_node, NULL_TREE);
17642 tree v8hi_ftype_v8hi_int
17643 = build_function_type_list (V8HI_type_node,
17644 V8HI_type_node, integer_type_node, NULL_TREE);
17645 tree v4si_ftype_v8hi_v8hi
17646 = build_function_type_list (V4SI_type_node,
17647 V8HI_type_node, V8HI_type_node, NULL_TREE);
17648 tree di_ftype_v8qi_v8qi
17649 = build_function_type_list (long_long_unsigned_type_node,
17650 V8QI_type_node, V8QI_type_node, NULL_TREE);
17651 tree di_ftype_v2si_v2si
17652 = build_function_type_list (long_long_unsigned_type_node,
17653 V2SI_type_node, V2SI_type_node, NULL_TREE);
17654 tree v2di_ftype_v16qi_v16qi
17655 = build_function_type_list (V2DI_type_node,
17656 V16QI_type_node, V16QI_type_node, NULL_TREE);
17657 tree v2di_ftype_v4si_v4si
17658 = build_function_type_list (V2DI_type_node,
17659 V4SI_type_node, V4SI_type_node, NULL_TREE);
17660 tree int_ftype_v16qi
17661 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17662 tree v16qi_ftype_pcchar
17663 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17664 tree void_ftype_pchar_v16qi
17665 = build_function_type_list (void_type_node,
17666 pchar_type_node, V16QI_type_node, NULL_TREE);
17668 tree v2di_ftype_v2di_unsigned_unsigned
17669 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17670 unsigned_type_node, unsigned_type_node,
17672 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17673 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17674 unsigned_type_node, unsigned_type_node,
17676 tree v2di_ftype_v2di_v16qi
17677 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17679 tree v2df_ftype_v2df_v2df_v2df
17680 = build_function_type_list (V2DF_type_node,
17681 V2DF_type_node, V2DF_type_node,
17682 V2DF_type_node, NULL_TREE);
17683 tree v4sf_ftype_v4sf_v4sf_v4sf
17684 = build_function_type_list (V4SF_type_node,
17685 V4SF_type_node, V4SF_type_node,
17686 V4SF_type_node, NULL_TREE);
17687 tree v8hi_ftype_v16qi
17688 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17690 tree v4si_ftype_v16qi
17691 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17693 tree v2di_ftype_v16qi
17694 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17696 tree v4si_ftype_v8hi
17697 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17699 tree v2di_ftype_v8hi
17700 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17702 tree v2di_ftype_v4si
17703 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17705 tree v2di_ftype_pv2di
17706 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17708 tree v16qi_ftype_v16qi_v16qi_int
17709 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17710 V16QI_type_node, integer_type_node,
17712 tree v16qi_ftype_v16qi_v16qi_v16qi
17713 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17714 V16QI_type_node, V16QI_type_node,
17716 tree v8hi_ftype_v8hi_v8hi_int
17717 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17718 V8HI_type_node, integer_type_node,
17720 tree v4si_ftype_v4si_v4si_int
17721 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17722 V4SI_type_node, integer_type_node,
17724 tree int_ftype_v2di_v2di
17725 = build_function_type_list (integer_type_node,
17726 V2DI_type_node, V2DI_type_node,
17728 tree int_ftype_v16qi_int_v16qi_int_int
17729 = build_function_type_list (integer_type_node,
17736 tree v16qi_ftype_v16qi_int_v16qi_int_int
17737 = build_function_type_list (V16QI_type_node,
17744 tree int_ftype_v16qi_v16qi_int
17745 = build_function_type_list (integer_type_node,
17752 /* The __float80 type. */
17753 if (TYPE_MODE (long_double_type_node) == XFmode)
17754 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17758 /* The __float80 type. */
17759 tree float80_type_node = make_node (REAL_TYPE);
17761 TYPE_PRECISION (float80_type_node) = 80;
17762 layout_type (float80_type_node);
17763 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17769 tree float128_type_node = make_node (REAL_TYPE);
17771 TYPE_PRECISION (float128_type_node) = 128;
17772 layout_type (float128_type_node);
17773 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17776 /* TFmode support builtins. */
17777 ftype = build_function_type (float128_type_node,
17779 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17781 ftype = build_function_type_list (float128_type_node,
17782 float128_type_node,
17784 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17786 ftype = build_function_type_list (float128_type_node,
17787 float128_type_node,
17788 float128_type_node,
17790 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17793 /* Add all SSE builtins that are more or less simple operations on
17795 for (i = 0, d = bdesc_sse_3arg;
17796 i < ARRAY_SIZE (bdesc_sse_3arg);
17799 /* Use one of the operands; the target can have a different mode for
17800 mask-generating compares. */
17801 enum machine_mode mode;
17806 mode = insn_data[d->icode].operand[1].mode;
17811 type = v16qi_ftype_v16qi_v16qi_int;
17814 type = v8hi_ftype_v8hi_v8hi_int;
17817 type = v4si_ftype_v4si_v4si_int;
17820 type = v2di_ftype_v2di_v2di_int;
17823 type = v2df_ftype_v2df_v2df_int;
17826 type = v4sf_ftype_v4sf_v4sf_int;
17829 gcc_unreachable ();
17832 /* Override for variable blends. */
17835 case CODE_FOR_sse4_1_blendvpd:
17836 type = v2df_ftype_v2df_v2df_v2df;
17838 case CODE_FOR_sse4_1_blendvps:
17839 type = v4sf_ftype_v4sf_v4sf_v4sf;
17841 case CODE_FOR_sse4_1_pblendvb:
17842 type = v16qi_ftype_v16qi_v16qi_v16qi;
17848 def_builtin (d->mask, d->name, type, d->code);
17851 /* Add all builtins that are more or less simple operations on two
17853 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17855 /* Use one of the operands; the target can have a different mode for
17856 mask-generating compares. */
17857 enum machine_mode mode;
17862 mode = insn_data[d->icode].operand[1].mode;
17867 type = v16qi_ftype_v16qi_v16qi;
17870 type = v8hi_ftype_v8hi_v8hi;
17873 type = v4si_ftype_v4si_v4si;
17876 type = v2di_ftype_v2di_v2di;
17879 type = v2df_ftype_v2df_v2df;
17882 type = v4sf_ftype_v4sf_v4sf;
17885 type = v8qi_ftype_v8qi_v8qi;
17888 type = v4hi_ftype_v4hi_v4hi;
17891 type = v2si_ftype_v2si_v2si;
17894 type = di_ftype_di_di;
17898 gcc_unreachable ();
17901 /* Override for comparisons. */
17902 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17903 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17904 type = v4si_ftype_v4sf_v4sf;
17906 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17907 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17908 type = v2di_ftype_v2df_v2df;
17910 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
17911 type = v4si_ftype_v2df_v2df;
17913 def_builtin_const (d->mask, d->name, type, d->code);
17916 /* Add all builtins that are more or less simple operations on 1 operand. */
17917 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17919 enum machine_mode mode;
17924 mode = insn_data[d->icode].operand[1].mode;
17929 type = v16qi_ftype_v16qi;
17932 type = v8hi_ftype_v8hi;
17935 type = v4si_ftype_v4si;
17938 type = v2df_ftype_v2df;
17941 type = v4sf_ftype_v4sf;
17944 type = v8qi_ftype_v8qi;
17947 type = v4hi_ftype_v4hi;
17950 type = v2si_ftype_v2si;
17957 def_builtin (d->mask, d->name, type, d->code);
17960 /* pcmpestr[im] insns. */
17961 for (i = 0, d = bdesc_pcmpestr;
17962 i < ARRAY_SIZE (bdesc_pcmpestr);
17965 if (d->code == IX86_BUILTIN_PCMPESTRM128)
17966 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
17968 ftype = int_ftype_v16qi_int_v16qi_int_int;
17969 def_builtin (d->mask, d->name, ftype, d->code);
17972 /* pcmpistr[im] insns. */
17973 for (i = 0, d = bdesc_pcmpistr;
17974 i < ARRAY_SIZE (bdesc_pcmpistr);
17977 if (d->code == IX86_BUILTIN_PCMPISTRM128)
17978 ftype = v16qi_ftype_v16qi_v16qi_int;
17980 ftype = int_ftype_v16qi_v16qi_int;
17981 def_builtin (d->mask, d->name, ftype, d->code);
17984 /* Add the remaining MMX insns with somewhat more complicated types. */
17985 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17986 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17987 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17988 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17990 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17991 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17992 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17994 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17995 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17997 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17998 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18000 /* comi/ucomi insns. */
18001 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18002 if (d->mask == OPTION_MASK_ISA_SSE2)
18003 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18005 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18008 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18009 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18011 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18012 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18013 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18015 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18016 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18017 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18018 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18019 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18020 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18021 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18022 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18023 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18024 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18025 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18027 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18029 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18030 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18032 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18033 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18034 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18035 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18037 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18038 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18039 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18040 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18042 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18044 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18046 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18047 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18048 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18049 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18050 ftype = build_function_type_list (float_type_node,
18053 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18054 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18055 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18057 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18059 /* Original 3DNow! */
18060 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18061 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18062 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18063 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18064 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18065 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18066 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18067 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18068 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18069 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18070 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18071 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18072 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18073 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18074 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18075 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18076 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18077 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18078 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18079 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18081 /* 3DNow! extension as used in the Athlon CPU. */
18082 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18083 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18084 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18085 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18086 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18087 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18090 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18092 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18093 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18095 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18096 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18098 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18099 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18100 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18101 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18102 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18104 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18105 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18106 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18107 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18109 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18110 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18112 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18114 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18115 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18117 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18118 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18119 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18121 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18123 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18125 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18126 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18127 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18128 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18130 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18132 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18134 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18135 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18139 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18140 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18141 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18143 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18144 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18146 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18147 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18149 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18150 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18151 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18152 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18153 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18154 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18155 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18157 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18159 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18160 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18161 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18162 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18163 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18165 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18166 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18167 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18168 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18170 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18172 /* Prescott New Instructions. */
18173 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18174 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18175 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18178 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18179 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18182 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18183 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18184 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18185 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18186 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18187 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18188 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18189 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18190 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18191 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18192 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18193 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18194 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18195 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18196 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18197 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18198 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18199 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18202 ftype = build_function_type_list (unsigned_type_node,
18203 unsigned_type_node,
18204 unsigned_char_type_node,
18206 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18207 ftype = build_function_type_list (unsigned_type_node,
18208 unsigned_type_node,
18209 short_unsigned_type_node,
18211 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18212 ftype = build_function_type_list (unsigned_type_node,
18213 unsigned_type_node,
18214 unsigned_type_node,
18216 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18217 ftype = build_function_type_list (long_long_unsigned_type_node,
18218 long_long_unsigned_type_node,
18219 long_long_unsigned_type_node,
18221 def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18223 /* AMDFAM10 SSE4A New built-ins */
18224 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18225 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18226 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18227 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18228 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18229 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18231 /* Access to the vec_init patterns. */
18232 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18233 integer_type_node, NULL_TREE);
18234 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18236 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18237 short_integer_type_node,
18238 short_integer_type_node,
18239 short_integer_type_node, NULL_TREE);
18240 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18242 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18243 char_type_node, char_type_node,
18244 char_type_node, char_type_node,
18245 char_type_node, char_type_node,
18246 char_type_node, NULL_TREE);
18247 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18249 /* Access to the vec_extract patterns. */
18250 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18251 integer_type_node, NULL_TREE);
18252 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18254 ftype = build_function_type_list (long_long_integer_type_node,
18255 V2DI_type_node, integer_type_node,
18257 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18259 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18260 integer_type_node, NULL_TREE);
18261 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18263 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18264 integer_type_node, NULL_TREE);
18265 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18267 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18268 integer_type_node, NULL_TREE);
18269 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18271 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18272 integer_type_node, NULL_TREE);
18273 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18275 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18276 integer_type_node, NULL_TREE);
18277 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18279 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18280 integer_type_node, NULL_TREE);
18281 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18283 /* Access to the vec_set patterns. */
18284 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18286 integer_type_node, NULL_TREE);
18287 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18289 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18291 integer_type_node, NULL_TREE);
18292 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18294 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18296 integer_type_node, NULL_TREE);
18297 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18299 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18301 integer_type_node, NULL_TREE);
18302 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18304 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18306 integer_type_node, NULL_TREE);
18307 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18309 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18311 integer_type_node, NULL_TREE);
18312 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18316 ix86_init_builtins (void)
18319 ix86_init_mmx_sse_builtins ();
18322 /* Errors in the source file can cause expand_expr to return const0_rtx
18323 where we expect a vector. To avoid crashing, use one of the vector
18324 clear instructions. */
18326 safe_vector_operand (rtx x, enum machine_mode mode)
18328 if (x == const0_rtx)
18329 x = CONST0_RTX (mode);
18333 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18334 4 operands. The third argument must be a constant smaller than 8
18338 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18342 tree arg0 = CALL_EXPR_ARG (exp, 0);
18343 tree arg1 = CALL_EXPR_ARG (exp, 1);
18344 tree arg2 = CALL_EXPR_ARG (exp, 2);
18345 rtx op0 = expand_normal (arg0);
18346 rtx op1 = expand_normal (arg1);
18347 rtx op2 = expand_normal (arg2);
18348 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18349 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18350 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18351 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18353 if (VECTOR_MODE_P (mode1))
18354 op0 = safe_vector_operand (op0, mode1);
18355 if (VECTOR_MODE_P (mode2))
18356 op1 = safe_vector_operand (op1, mode2);
18357 if (VECTOR_MODE_P (mode3))
18358 op2 = safe_vector_operand (op2, mode3);
18362 || GET_MODE (target) != tmode
18363 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18364 target = gen_reg_rtx (tmode);
18366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18367 op0 = copy_to_mode_reg (mode1, op0);
18368 if ((optimize && !register_operand (op1, mode2))
18369 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18370 op1 = copy_to_mode_reg (mode2, op1);
18372 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18375 case CODE_FOR_sse4_1_blendvpd:
18376 case CODE_FOR_sse4_1_blendvps:
18377 case CODE_FOR_sse4_1_pblendvb:
18378 op2 = copy_to_mode_reg (mode3, op2);
18381 case CODE_FOR_sse4_1_roundsd:
18382 case CODE_FOR_sse4_1_roundss:
18383 error ("the third argument must be a 4-bit immediate");
18387 error ("the third argument must be an 8-bit immediate");
18391 pat = GEN_FCN (icode) (target, op0, op1, op2);
18398 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18401 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18404 tree arg0 = CALL_EXPR_ARG (exp, 0);
18405 tree arg1 = CALL_EXPR_ARG (exp, 1);
18406 rtx op0 = expand_normal (arg0);
18407 rtx op1 = expand_normal (arg1);
18408 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18409 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18410 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18414 || GET_MODE (target) != tmode
18415 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18416 target = gen_reg_rtx (tmode);
18418 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18419 op0 = copy_to_mode_reg (mode0, op0);
18420 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18422 op1 = copy_to_reg (op1);
18423 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18426 pat = GEN_FCN (icode) (target, op0, op1);
18433 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18436 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18439 tree arg0 = CALL_EXPR_ARG (exp, 0);
18440 tree arg1 = CALL_EXPR_ARG (exp, 1);
18441 rtx op0 = expand_normal (arg0);
18442 rtx op1 = expand_normal (arg1);
18443 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18444 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18445 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18447 if (VECTOR_MODE_P (mode0))
18448 op0 = safe_vector_operand (op0, mode0);
18449 if (VECTOR_MODE_P (mode1))
18450 op1 = safe_vector_operand (op1, mode1);
18452 if (optimize || !target
18453 || GET_MODE (target) != tmode
18454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18455 target = gen_reg_rtx (tmode);
18457 if (GET_MODE (op1) == SImode && mode1 == TImode)
18459 rtx x = gen_reg_rtx (V4SImode);
18460 emit_insn (gen_sse2_loadd (x, op1));
18461 op1 = gen_lowpart (TImode, x);
18464 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18465 op0 = copy_to_mode_reg (mode0, op0);
18466 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18467 op1 = copy_to_mode_reg (mode1, op1);
18469 /* ??? Using ix86_fixup_binary_operands is problematic when
18470 we've got mismatched modes. Fake it. */
18476 if (tmode == mode0 && tmode == mode1)
18478 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18482 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18484 op0 = force_reg (mode0, op0);
18485 op1 = force_reg (mode1, op1);
18486 target = gen_reg_rtx (tmode);
18489 pat = GEN_FCN (icode) (target, op0, op1);
18496 /* Subroutine of ix86_expand_builtin to take care of stores. */
18499 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18502 tree arg0 = CALL_EXPR_ARG (exp, 0);
18503 tree arg1 = CALL_EXPR_ARG (exp, 1);
18504 rtx op0 = expand_normal (arg0);
18505 rtx op1 = expand_normal (arg1);
18506 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18507 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18509 if (VECTOR_MODE_P (mode1))
18510 op1 = safe_vector_operand (op1, mode1);
18512 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18513 op1 = copy_to_mode_reg (mode1, op1);
18515 pat = GEN_FCN (icode) (op0, op1);
18521 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18524 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18525 rtx target, int do_load)
18528 tree arg0 = CALL_EXPR_ARG (exp, 0);
18529 rtx op0 = expand_normal (arg0);
18530 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18531 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18533 if (optimize || !target
18534 || GET_MODE (target) != tmode
18535 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18536 target = gen_reg_rtx (tmode);
18538 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18541 if (VECTOR_MODE_P (mode0))
18542 op0 = safe_vector_operand (op0, mode0);
18544 if ((optimize && !register_operand (op0, mode0))
18545 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18546 op0 = copy_to_mode_reg (mode0, op0);
18551 case CODE_FOR_sse4_1_roundpd:
18552 case CODE_FOR_sse4_1_roundps:
18554 tree arg1 = CALL_EXPR_ARG (exp, 1);
18555 rtx op1 = expand_normal (arg1);
18556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18558 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18560 error ("the second argument must be a 4-bit immediate");
18563 pat = GEN_FCN (icode) (target, op0, op1);
18567 pat = GEN_FCN (icode) (target, op0);
18577 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18578 sqrtss, rsqrtss, rcpss. */
18581 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18584 tree arg0 = CALL_EXPR_ARG (exp, 0);
18585 rtx op1, op0 = expand_normal (arg0);
18586 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18587 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18589 if (optimize || !target
18590 || GET_MODE (target) != tmode
18591 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18592 target = gen_reg_rtx (tmode);
18594 if (VECTOR_MODE_P (mode0))
18595 op0 = safe_vector_operand (op0, mode0);
18597 if ((optimize && !register_operand (op0, mode0))
18598 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18599 op0 = copy_to_mode_reg (mode0, op0);
18602 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18603 op1 = copy_to_mode_reg (mode0, op1);
18605 pat = GEN_FCN (icode) (target, op0, op1);
18612 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18615 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18619 tree arg0 = CALL_EXPR_ARG (exp, 0);
18620 tree arg1 = CALL_EXPR_ARG (exp, 1);
18621 rtx op0 = expand_normal (arg0);
18622 rtx op1 = expand_normal (arg1);
18624 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18625 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18626 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18627 enum rtx_code comparison = d->comparison;
18629 if (VECTOR_MODE_P (mode0))
18630 op0 = safe_vector_operand (op0, mode0);
18631 if (VECTOR_MODE_P (mode1))
18632 op1 = safe_vector_operand (op1, mode1);
18634 /* Swap operands if we have a comparison that isn't available in
18636 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18638 rtx tmp = gen_reg_rtx (mode1);
18639 emit_move_insn (tmp, op1);
18644 if (optimize || !target
18645 || GET_MODE (target) != tmode
18646 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18647 target = gen_reg_rtx (tmode);
18649 if ((optimize && !register_operand (op0, mode0))
18650 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18651 op0 = copy_to_mode_reg (mode0, op0);
18652 if ((optimize && !register_operand (op1, mode1))
18653 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18654 op1 = copy_to_mode_reg (mode1, op1);
18656 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18657 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18664 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18667 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18671 tree arg0 = CALL_EXPR_ARG (exp, 0);
18672 tree arg1 = CALL_EXPR_ARG (exp, 1);
18673 rtx op0 = expand_normal (arg0);
18674 rtx op1 = expand_normal (arg1);
18675 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18676 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18677 enum rtx_code comparison = d->comparison;
18679 if (VECTOR_MODE_P (mode0))
18680 op0 = safe_vector_operand (op0, mode0);
18681 if (VECTOR_MODE_P (mode1))
18682 op1 = safe_vector_operand (op1, mode1);
18684 /* Swap operands if we have a comparison that isn't available in
18686 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18693 target = gen_reg_rtx (SImode);
18694 emit_move_insn (target, const0_rtx);
18695 target = gen_rtx_SUBREG (QImode, target, 0);
18697 if ((optimize && !register_operand (op0, mode0))
18698 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18699 op0 = copy_to_mode_reg (mode0, op0);
18700 if ((optimize && !register_operand (op1, mode1))
18701 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18702 op1 = copy_to_mode_reg (mode1, op1);
18704 pat = GEN_FCN (d->icode) (op0, op1);
18708 emit_insn (gen_rtx_SET (VOIDmode,
18709 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18710 gen_rtx_fmt_ee (comparison, QImode,
18714 return SUBREG_REG (target);
18717 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18720 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18724 tree arg0 = CALL_EXPR_ARG (exp, 0);
18725 tree arg1 = CALL_EXPR_ARG (exp, 1);
18726 rtx op0 = expand_normal (arg0);
18727 rtx op1 = expand_normal (arg1);
18728 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18729 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18730 enum rtx_code comparison = d->comparison;
18732 if (VECTOR_MODE_P (mode0))
18733 op0 = safe_vector_operand (op0, mode0);
18734 if (VECTOR_MODE_P (mode1))
18735 op1 = safe_vector_operand (op1, mode1);
18737 target = gen_reg_rtx (SImode);
18738 emit_move_insn (target, const0_rtx);
18739 target = gen_rtx_SUBREG (QImode, target, 0);
18741 if ((optimize && !register_operand (op0, mode0))
18742 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18743 op0 = copy_to_mode_reg (mode0, op0);
18744 if ((optimize && !register_operand (op1, mode1))
18745 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18746 op1 = copy_to_mode_reg (mode1, op1);
18748 pat = GEN_FCN (d->icode) (op0, op1);
18752 emit_insn (gen_rtx_SET (VOIDmode,
18753 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18754 gen_rtx_fmt_ee (comparison, QImode,
18758 return SUBREG_REG (target);
18761 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18764 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18765 tree exp, rtx target)
18768 tree arg0 = CALL_EXPR_ARG (exp, 0);
18769 tree arg1 = CALL_EXPR_ARG (exp, 1);
18770 tree arg2 = CALL_EXPR_ARG (exp, 2);
18771 tree arg3 = CALL_EXPR_ARG (exp, 3);
18772 tree arg4 = CALL_EXPR_ARG (exp, 4);
18773 rtx scratch0, scratch1;
18774 rtx op0 = expand_normal (arg0);
18775 rtx op1 = expand_normal (arg1);
18776 rtx op2 = expand_normal (arg2);
18777 rtx op3 = expand_normal (arg3);
18778 rtx op4 = expand_normal (arg4);
18779 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18781 tmode0 = insn_data[d->icode].operand[0].mode;
18782 tmode1 = insn_data[d->icode].operand[1].mode;
18783 modev2 = insn_data[d->icode].operand[2].mode;
18784 modei3 = insn_data[d->icode].operand[3].mode;
18785 modev4 = insn_data[d->icode].operand[4].mode;
18786 modei5 = insn_data[d->icode].operand[5].mode;
18787 modeimm = insn_data[d->icode].operand[6].mode;
18789 if (VECTOR_MODE_P (modev2))
18790 op0 = safe_vector_operand (op0, modev2);
18791 if (VECTOR_MODE_P (modev4))
18792 op2 = safe_vector_operand (op2, modev4);
18794 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18795 op0 = copy_to_mode_reg (modev2, op0);
18796 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18797 op1 = copy_to_mode_reg (modei3, op1);
18798 if ((optimize && !register_operand (op2, modev4))
18799 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18800 op2 = copy_to_mode_reg (modev4, op2);
18801 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18802 op3 = copy_to_mode_reg (modei5, op3);
18804 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18806 error ("the fifth argument must be a 8-bit immediate");
18810 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18812 if (optimize || !target
18813 || GET_MODE (target) != tmode0
18814 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18815 target = gen_reg_rtx (tmode0);
18817 scratch1 = gen_reg_rtx (tmode1);
18819 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18821 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18823 if (optimize || !target
18824 || GET_MODE (target) != tmode1
18825 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18826 target = gen_reg_rtx (tmode1);
18828 scratch0 = gen_reg_rtx (tmode0);
18830 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18834 gcc_assert (d->flag);
18836 scratch0 = gen_reg_rtx (tmode0);
18837 scratch1 = gen_reg_rtx (tmode1);
18839 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18849 target = gen_reg_rtx (SImode);
18850 emit_move_insn (target, const0_rtx);
18851 target = gen_rtx_SUBREG (QImode, target, 0);
18854 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18855 gen_rtx_fmt_ee (EQ, QImode,
18856 gen_rtx_REG ((enum machine_mode) d->flag,
18859 return SUBREG_REG (target);
18866 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18869 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18870 tree exp, rtx target)
18873 tree arg0 = CALL_EXPR_ARG (exp, 0);
18874 tree arg1 = CALL_EXPR_ARG (exp, 1);
18875 tree arg2 = CALL_EXPR_ARG (exp, 2);
18876 rtx scratch0, scratch1;
18877 rtx op0 = expand_normal (arg0);
18878 rtx op1 = expand_normal (arg1);
18879 rtx op2 = expand_normal (arg2);
18880 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18882 tmode0 = insn_data[d->icode].operand[0].mode;
18883 tmode1 = insn_data[d->icode].operand[1].mode;
18884 modev2 = insn_data[d->icode].operand[2].mode;
18885 modev3 = insn_data[d->icode].operand[3].mode;
18886 modeimm = insn_data[d->icode].operand[4].mode;
18888 if (VECTOR_MODE_P (modev2))
18889 op0 = safe_vector_operand (op0, modev2);
18890 if (VECTOR_MODE_P (modev3))
18891 op1 = safe_vector_operand (op1, modev3);
18893 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18894 op0 = copy_to_mode_reg (modev2, op0);
18895 if ((optimize && !register_operand (op1, modev3))
18896 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18897 op1 = copy_to_mode_reg (modev3, op1);
18899 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18901 error ("the third argument must be a 8-bit immediate");
18905 if (d->code == IX86_BUILTIN_PCMPISTRI128)
18907 if (optimize || !target
18908 || GET_MODE (target) != tmode0
18909 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18910 target = gen_reg_rtx (tmode0);
18912 scratch1 = gen_reg_rtx (tmode1);
18914 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
18916 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
18918 if (optimize || !target
18919 || GET_MODE (target) != tmode1
18920 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18921 target = gen_reg_rtx (tmode1);
18923 scratch0 = gen_reg_rtx (tmode0);
18925 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
18929 gcc_assert (d->flag);
18931 scratch0 = gen_reg_rtx (tmode0);
18932 scratch1 = gen_reg_rtx (tmode1);
18934 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
18944 target = gen_reg_rtx (SImode);
18945 emit_move_insn (target, const0_rtx);
18946 target = gen_rtx_SUBREG (QImode, target, 0);
18949 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18950 gen_rtx_fmt_ee (EQ, QImode,
18951 gen_rtx_REG ((enum machine_mode) d->flag,
18954 return SUBREG_REG (target);
18960 /* Return the integer constant in ARG. Constrain it to be in the range
18961 of the subparts of VEC_TYPE; issue an error if not. */
18964 get_element_number (tree vec_type, tree arg)
18966 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18968 if (!host_integerp (arg, 1)
18969 || (elt = tree_low_cst (arg, 1), elt > max))
18971 error ("selector must be an integer constant in the range 0..%wi", max);
18978 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18979 ix86_expand_vector_init. We DO have language-level syntax for this, in
18980 the form of (type){ init-list }. Except that since we can't place emms
18981 instructions from inside the compiler, we can't allow the use of MMX
18982 registers unless the user explicitly asks for it. So we do *not* define
18983 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18984 we have builtins invoked by mmintrin.h that gives us license to emit
18985 these sorts of instructions. */
18988 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18990 enum machine_mode tmode = TYPE_MODE (type);
18991 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18992 int i, n_elt = GET_MODE_NUNITS (tmode);
18993 rtvec v = rtvec_alloc (n_elt);
18995 gcc_assert (VECTOR_MODE_P (tmode));
18996 gcc_assert (call_expr_nargs (exp) == n_elt);
18998 for (i = 0; i < n_elt; ++i)
19000 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19001 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19004 if (!target || !register_operand (target, tmode))
19005 target = gen_reg_rtx (tmode);
19007 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19011 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19012 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19013 had a language-level syntax for referencing vector elements. */
19016 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19018 enum machine_mode tmode, mode0;
19023 arg0 = CALL_EXPR_ARG (exp, 0);
19024 arg1 = CALL_EXPR_ARG (exp, 1);
19026 op0 = expand_normal (arg0);
19027 elt = get_element_number (TREE_TYPE (arg0), arg1);
19029 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19030 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19031 gcc_assert (VECTOR_MODE_P (mode0));
19033 op0 = force_reg (mode0, op0);
19035 if (optimize || !target || !register_operand (target, tmode))
19036 target = gen_reg_rtx (tmode);
19038 ix86_expand_vector_extract (true, target, op0, elt);
19043 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19044 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19045 a language-level syntax for referencing vector elements. */
19048 ix86_expand_vec_set_builtin (tree exp)
19050 enum machine_mode tmode, mode1;
19051 tree arg0, arg1, arg2;
19053 rtx op0, op1, target;
19055 arg0 = CALL_EXPR_ARG (exp, 0);
19056 arg1 = CALL_EXPR_ARG (exp, 1);
19057 arg2 = CALL_EXPR_ARG (exp, 2);
19059 tmode = TYPE_MODE (TREE_TYPE (arg0));
19060 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19061 gcc_assert (VECTOR_MODE_P (tmode));
19063 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19064 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19065 elt = get_element_number (TREE_TYPE (arg0), arg2);
19067 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19068 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19070 op0 = force_reg (tmode, op0);
19071 op1 = force_reg (mode1, op1);
19073 /* OP0 is the source of these builtin functions and shouldn't be
19074 modified. Create a copy, use it and return it as target. */
19075 target = gen_reg_rtx (tmode);
19076 emit_move_insn (target, op0);
19077 ix86_expand_vector_set (true, target, op1, elt);
19082 /* Expand an expression EXP that calls a built-in function,
19083 with result going to TARGET if that's convenient
19084 (and in mode MODE if that's convenient).
19085 SUBTARGET may be used as the target for computing one of EXP's operands.
19086 IGNORE is nonzero if the value is to be ignored. */
19089 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19090 enum machine_mode mode ATTRIBUTE_UNUSED,
19091 int ignore ATTRIBUTE_UNUSED)
19093 const struct builtin_description *d;
19095 enum insn_code icode;
19096 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19097 tree arg0, arg1, arg2, arg3;
19098 rtx op0, op1, op2, op3, pat;
19099 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19100 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19104 case IX86_BUILTIN_EMMS:
19105 emit_insn (gen_mmx_emms ());
19108 case IX86_BUILTIN_SFENCE:
19109 emit_insn (gen_sse_sfence ());
19112 case IX86_BUILTIN_MASKMOVQ:
19113 case IX86_BUILTIN_MASKMOVDQU:
19114 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19115 ? CODE_FOR_mmx_maskmovq
19116 : CODE_FOR_sse2_maskmovdqu);
19117 /* Note the arg order is different from the operand order. */
19118 arg1 = CALL_EXPR_ARG (exp, 0);
19119 arg2 = CALL_EXPR_ARG (exp, 1);
19120 arg0 = CALL_EXPR_ARG (exp, 2);
19121 op0 = expand_normal (arg0);
19122 op1 = expand_normal (arg1);
19123 op2 = expand_normal (arg2);
19124 mode0 = insn_data[icode].operand[0].mode;
19125 mode1 = insn_data[icode].operand[1].mode;
19126 mode2 = insn_data[icode].operand[2].mode;
19128 op0 = force_reg (Pmode, op0);
19129 op0 = gen_rtx_MEM (mode1, op0);
19131 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19132 op0 = copy_to_mode_reg (mode0, op0);
19133 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19134 op1 = copy_to_mode_reg (mode1, op1);
19135 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19136 op2 = copy_to_mode_reg (mode2, op2);
19137 pat = GEN_FCN (icode) (op0, op1, op2);
19143 case IX86_BUILTIN_RSQRTF:
19144 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19146 case IX86_BUILTIN_SQRTSS:
19147 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19148 case IX86_BUILTIN_RSQRTSS:
19149 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19150 case IX86_BUILTIN_RCPSS:
19151 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19153 case IX86_BUILTIN_LOADUPS:
19154 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19156 case IX86_BUILTIN_STOREUPS:
19157 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19159 case IX86_BUILTIN_LOADHPS:
19160 case IX86_BUILTIN_LOADLPS:
19161 case IX86_BUILTIN_LOADHPD:
19162 case IX86_BUILTIN_LOADLPD:
19163 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19164 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19165 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19166 : CODE_FOR_sse2_loadlpd);
19167 arg0 = CALL_EXPR_ARG (exp, 0);
19168 arg1 = CALL_EXPR_ARG (exp, 1);
19169 op0 = expand_normal (arg0);
19170 op1 = expand_normal (arg1);
19171 tmode = insn_data[icode].operand[0].mode;
19172 mode0 = insn_data[icode].operand[1].mode;
19173 mode1 = insn_data[icode].operand[2].mode;
19175 op0 = force_reg (mode0, op0);
19176 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19177 if (optimize || target == 0
19178 || GET_MODE (target) != tmode
19179 || !register_operand (target, tmode))
19180 target = gen_reg_rtx (tmode);
19181 pat = GEN_FCN (icode) (target, op0, op1);
19187 case IX86_BUILTIN_STOREHPS:
19188 case IX86_BUILTIN_STORELPS:
19189 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19190 : CODE_FOR_sse_storelps);
19191 arg0 = CALL_EXPR_ARG (exp, 0);
19192 arg1 = CALL_EXPR_ARG (exp, 1);
19193 op0 = expand_normal (arg0);
19194 op1 = expand_normal (arg1);
19195 mode0 = insn_data[icode].operand[0].mode;
19196 mode1 = insn_data[icode].operand[1].mode;
19198 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19199 op1 = force_reg (mode1, op1);
19201 pat = GEN_FCN (icode) (op0, op1);
19207 case IX86_BUILTIN_MOVNTPS:
19208 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19209 case IX86_BUILTIN_MOVNTQ:
19210 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19212 case IX86_BUILTIN_LDMXCSR:
19213 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19214 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19215 emit_move_insn (target, op0);
19216 emit_insn (gen_sse_ldmxcsr (target));
19219 case IX86_BUILTIN_STMXCSR:
19220 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19221 emit_insn (gen_sse_stmxcsr (target));
19222 return copy_to_mode_reg (SImode, target);
19224 case IX86_BUILTIN_SHUFPS:
19225 case IX86_BUILTIN_SHUFPD:
19226 icode = (fcode == IX86_BUILTIN_SHUFPS
19227 ? CODE_FOR_sse_shufps
19228 : CODE_FOR_sse2_shufpd);
19229 arg0 = CALL_EXPR_ARG (exp, 0);
19230 arg1 = CALL_EXPR_ARG (exp, 1);
19231 arg2 = CALL_EXPR_ARG (exp, 2);
19232 op0 = expand_normal (arg0);
19233 op1 = expand_normal (arg1);
19234 op2 = expand_normal (arg2);
19235 tmode = insn_data[icode].operand[0].mode;
19236 mode0 = insn_data[icode].operand[1].mode;
19237 mode1 = insn_data[icode].operand[2].mode;
19238 mode2 = insn_data[icode].operand[3].mode;
19240 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19241 op0 = copy_to_mode_reg (mode0, op0);
19242 if ((optimize && !register_operand (op1, mode1))
19243 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19244 op1 = copy_to_mode_reg (mode1, op1);
19245 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19247 /* @@@ better error message */
19248 error ("mask must be an immediate");
19249 return gen_reg_rtx (tmode);
19251 if (optimize || target == 0
19252 || GET_MODE (target) != tmode
19253 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19254 target = gen_reg_rtx (tmode);
19255 pat = GEN_FCN (icode) (target, op0, op1, op2);
19261 case IX86_BUILTIN_PSHUFW:
19262 case IX86_BUILTIN_PSHUFD:
19263 case IX86_BUILTIN_PSHUFHW:
19264 case IX86_BUILTIN_PSHUFLW:
19265 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19266 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19267 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19268 : CODE_FOR_mmx_pshufw);
19269 arg0 = CALL_EXPR_ARG (exp, 0);
19270 arg1 = CALL_EXPR_ARG (exp, 1);
19271 op0 = expand_normal (arg0);
19272 op1 = expand_normal (arg1);
19273 tmode = insn_data[icode].operand[0].mode;
19274 mode1 = insn_data[icode].operand[1].mode;
19275 mode2 = insn_data[icode].operand[2].mode;
19277 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19278 op0 = copy_to_mode_reg (mode1, op0);
19279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19281 /* @@@ better error message */
19282 error ("mask must be an immediate");
19286 || GET_MODE (target) != tmode
19287 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19288 target = gen_reg_rtx (tmode);
19289 pat = GEN_FCN (icode) (target, op0, op1);
19295 case IX86_BUILTIN_PSLLWI128:
19296 icode = CODE_FOR_ashlv8hi3;
19298 case IX86_BUILTIN_PSLLDI128:
19299 icode = CODE_FOR_ashlv4si3;
19301 case IX86_BUILTIN_PSLLQI128:
19302 icode = CODE_FOR_ashlv2di3;
19304 case IX86_BUILTIN_PSRAWI128:
19305 icode = CODE_FOR_ashrv8hi3;
19307 case IX86_BUILTIN_PSRADI128:
19308 icode = CODE_FOR_ashrv4si3;
19310 case IX86_BUILTIN_PSRLWI128:
19311 icode = CODE_FOR_lshrv8hi3;
19313 case IX86_BUILTIN_PSRLDI128:
19314 icode = CODE_FOR_lshrv4si3;
19316 case IX86_BUILTIN_PSRLQI128:
19317 icode = CODE_FOR_lshrv2di3;
19320 arg0 = CALL_EXPR_ARG (exp, 0);
19321 arg1 = CALL_EXPR_ARG (exp, 1);
19322 op0 = expand_normal (arg0);
19323 op1 = expand_normal (arg1);
19325 if (!CONST_INT_P (op1))
19327 error ("shift must be an immediate");
19330 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19331 op1 = GEN_INT (255);
19333 tmode = insn_data[icode].operand[0].mode;
19334 mode1 = insn_data[icode].operand[1].mode;
19335 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19336 op0 = copy_to_reg (op0);
19338 target = gen_reg_rtx (tmode);
19339 pat = GEN_FCN (icode) (target, op0, op1);
19345 case IX86_BUILTIN_PSLLW128:
19346 icode = CODE_FOR_ashlv8hi3;
19348 case IX86_BUILTIN_PSLLD128:
19349 icode = CODE_FOR_ashlv4si3;
19351 case IX86_BUILTIN_PSLLQ128:
19352 icode = CODE_FOR_ashlv2di3;
19354 case IX86_BUILTIN_PSRAW128:
19355 icode = CODE_FOR_ashrv8hi3;
19357 case IX86_BUILTIN_PSRAD128:
19358 icode = CODE_FOR_ashrv4si3;
19360 case IX86_BUILTIN_PSRLW128:
19361 icode = CODE_FOR_lshrv8hi3;
19363 case IX86_BUILTIN_PSRLD128:
19364 icode = CODE_FOR_lshrv4si3;
19366 case IX86_BUILTIN_PSRLQ128:
19367 icode = CODE_FOR_lshrv2di3;
19370 arg0 = CALL_EXPR_ARG (exp, 0);
19371 arg1 = CALL_EXPR_ARG (exp, 1);
19372 op0 = expand_normal (arg0);
19373 op1 = expand_normal (arg1);
19375 tmode = insn_data[icode].operand[0].mode;
19376 mode1 = insn_data[icode].operand[1].mode;
19378 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19379 op0 = copy_to_reg (op0);
19381 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19382 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19383 op1 = copy_to_reg (op1);
19385 target = gen_reg_rtx (tmode);
19386 pat = GEN_FCN (icode) (target, op0, op1);
19392 case IX86_BUILTIN_PSLLDQI128:
19393 case IX86_BUILTIN_PSRLDQI128:
19394 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19395 : CODE_FOR_sse2_lshrti3);
19396 arg0 = CALL_EXPR_ARG (exp, 0);
19397 arg1 = CALL_EXPR_ARG (exp, 1);
19398 op0 = expand_normal (arg0);
19399 op1 = expand_normal (arg1);
19400 tmode = insn_data[icode].operand[0].mode;
19401 mode1 = insn_data[icode].operand[1].mode;
19402 mode2 = insn_data[icode].operand[2].mode;
19404 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19406 op0 = copy_to_reg (op0);
19407 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19409 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19411 error ("shift must be an immediate");
19414 target = gen_reg_rtx (V2DImode);
19415 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19422 case IX86_BUILTIN_FEMMS:
19423 emit_insn (gen_mmx_femms ());
19426 case IX86_BUILTIN_PAVGUSB:
19427 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19429 case IX86_BUILTIN_PF2ID:
19430 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19432 case IX86_BUILTIN_PFACC:
19433 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19435 case IX86_BUILTIN_PFADD:
19436 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19438 case IX86_BUILTIN_PFCMPEQ:
19439 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19441 case IX86_BUILTIN_PFCMPGE:
19442 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19444 case IX86_BUILTIN_PFCMPGT:
19445 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19447 case IX86_BUILTIN_PFMAX:
19448 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19450 case IX86_BUILTIN_PFMIN:
19451 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19453 case IX86_BUILTIN_PFMUL:
19454 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19456 case IX86_BUILTIN_PFRCP:
19457 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19459 case IX86_BUILTIN_PFRCPIT1:
19460 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19462 case IX86_BUILTIN_PFRCPIT2:
19463 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19465 case IX86_BUILTIN_PFRSQIT1:
19466 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19468 case IX86_BUILTIN_PFRSQRT:
19469 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19471 case IX86_BUILTIN_PFSUB:
19472 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19474 case IX86_BUILTIN_PFSUBR:
19475 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19477 case IX86_BUILTIN_PI2FD:
19478 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19480 case IX86_BUILTIN_PMULHRW:
19481 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19483 case IX86_BUILTIN_PF2IW:
19484 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19486 case IX86_BUILTIN_PFNACC:
19487 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19489 case IX86_BUILTIN_PFPNACC:
19490 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19492 case IX86_BUILTIN_PI2FW:
19493 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19495 case IX86_BUILTIN_PSWAPDSI:
19496 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19498 case IX86_BUILTIN_PSWAPDSF:
19499 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19501 case IX86_BUILTIN_SQRTSD:
19502 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19503 case IX86_BUILTIN_LOADUPD:
19504 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19505 case IX86_BUILTIN_STOREUPD:
19506 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19508 case IX86_BUILTIN_MFENCE:
19509 emit_insn (gen_sse2_mfence ());
19511 case IX86_BUILTIN_LFENCE:
19512 emit_insn (gen_sse2_lfence ());
19515 case IX86_BUILTIN_CLFLUSH:
19516 arg0 = CALL_EXPR_ARG (exp, 0);
19517 op0 = expand_normal (arg0);
19518 icode = CODE_FOR_sse2_clflush;
19519 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19520 op0 = copy_to_mode_reg (Pmode, op0);
19522 emit_insn (gen_sse2_clflush (op0));
19525 case IX86_BUILTIN_MOVNTPD:
19526 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19527 case IX86_BUILTIN_MOVNTDQ:
19528 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19529 case IX86_BUILTIN_MOVNTI:
19530 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19532 case IX86_BUILTIN_LOADDQU:
19533 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19534 case IX86_BUILTIN_STOREDQU:
19535 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19537 case IX86_BUILTIN_MONITOR:
19538 arg0 = CALL_EXPR_ARG (exp, 0);
19539 arg1 = CALL_EXPR_ARG (exp, 1);
19540 arg2 = CALL_EXPR_ARG (exp, 2);
19541 op0 = expand_normal (arg0);
19542 op1 = expand_normal (arg1);
19543 op2 = expand_normal (arg2);
19545 op0 = copy_to_mode_reg (Pmode, op0);
19547 op1 = copy_to_mode_reg (SImode, op1);
19549 op2 = copy_to_mode_reg (SImode, op2);
19551 emit_insn (gen_sse3_monitor (op0, op1, op2));
19553 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19556 case IX86_BUILTIN_MWAIT:
19557 arg0 = CALL_EXPR_ARG (exp, 0);
19558 arg1 = CALL_EXPR_ARG (exp, 1);
19559 op0 = expand_normal (arg0);
19560 op1 = expand_normal (arg1);
19562 op0 = copy_to_mode_reg (SImode, op0);
19564 op1 = copy_to_mode_reg (SImode, op1);
19565 emit_insn (gen_sse3_mwait (op0, op1));
19568 case IX86_BUILTIN_LDDQU:
19569 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19572 case IX86_BUILTIN_PALIGNR:
19573 case IX86_BUILTIN_PALIGNR128:
19574 if (fcode == IX86_BUILTIN_PALIGNR)
19576 icode = CODE_FOR_ssse3_palignrdi;
19581 icode = CODE_FOR_ssse3_palignrti;
19584 arg0 = CALL_EXPR_ARG (exp, 0);
19585 arg1 = CALL_EXPR_ARG (exp, 1);
19586 arg2 = CALL_EXPR_ARG (exp, 2);
19587 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19588 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19589 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19590 tmode = insn_data[icode].operand[0].mode;
19591 mode1 = insn_data[icode].operand[1].mode;
19592 mode2 = insn_data[icode].operand[2].mode;
19593 mode3 = insn_data[icode].operand[3].mode;
19595 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19597 op0 = copy_to_reg (op0);
19598 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19600 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19602 op1 = copy_to_reg (op1);
19603 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19605 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19607 error ("shift must be an immediate");
19610 target = gen_reg_rtx (mode);
19611 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19618 case IX86_BUILTIN_MOVNTDQA:
19619 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19622 case IX86_BUILTIN_MOVNTSD:
19623 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19625 case IX86_BUILTIN_MOVNTSS:
19626 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19628 case IX86_BUILTIN_INSERTQ:
19629 case IX86_BUILTIN_EXTRQ:
19630 icode = (fcode == IX86_BUILTIN_EXTRQ
19631 ? CODE_FOR_sse4a_extrq
19632 : CODE_FOR_sse4a_insertq);
19633 arg0 = CALL_EXPR_ARG (exp, 0);
19634 arg1 = CALL_EXPR_ARG (exp, 1);
19635 op0 = expand_normal (arg0);
19636 op1 = expand_normal (arg1);
19637 tmode = insn_data[icode].operand[0].mode;
19638 mode1 = insn_data[icode].operand[1].mode;
19639 mode2 = insn_data[icode].operand[2].mode;
19640 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19641 op0 = copy_to_mode_reg (mode1, op0);
19642 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19643 op1 = copy_to_mode_reg (mode2, op1);
19644 if (optimize || target == 0
19645 || GET_MODE (target) != tmode
19646 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19647 target = gen_reg_rtx (tmode);
19648 pat = GEN_FCN (icode) (target, op0, op1);
19654 case IX86_BUILTIN_EXTRQI:
19655 icode = CODE_FOR_sse4a_extrqi;
19656 arg0 = CALL_EXPR_ARG (exp, 0);
19657 arg1 = CALL_EXPR_ARG (exp, 1);
19658 arg2 = CALL_EXPR_ARG (exp, 2);
19659 op0 = expand_normal (arg0);
19660 op1 = expand_normal (arg1);
19661 op2 = expand_normal (arg2);
19662 tmode = insn_data[icode].operand[0].mode;
19663 mode1 = insn_data[icode].operand[1].mode;
19664 mode2 = insn_data[icode].operand[2].mode;
19665 mode3 = insn_data[icode].operand[3].mode;
19666 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19667 op0 = copy_to_mode_reg (mode1, op0);
19668 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19670 error ("index mask must be an immediate");
19671 return gen_reg_rtx (tmode);
19673 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19675 error ("length mask must be an immediate");
19676 return gen_reg_rtx (tmode);
19678 if (optimize || target == 0
19679 || GET_MODE (target) != tmode
19680 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19681 target = gen_reg_rtx (tmode);
19682 pat = GEN_FCN (icode) (target, op0, op1, op2);
19688 case IX86_BUILTIN_INSERTQI:
19689 icode = CODE_FOR_sse4a_insertqi;
19690 arg0 = CALL_EXPR_ARG (exp, 0);
19691 arg1 = CALL_EXPR_ARG (exp, 1);
19692 arg2 = CALL_EXPR_ARG (exp, 2);
19693 arg3 = CALL_EXPR_ARG (exp, 3);
19694 op0 = expand_normal (arg0);
19695 op1 = expand_normal (arg1);
19696 op2 = expand_normal (arg2);
19697 op3 = expand_normal (arg3);
19698 tmode = insn_data[icode].operand[0].mode;
19699 mode1 = insn_data[icode].operand[1].mode;
19700 mode2 = insn_data[icode].operand[2].mode;
19701 mode3 = insn_data[icode].operand[3].mode;
19702 mode4 = insn_data[icode].operand[4].mode;
19704 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19705 op0 = copy_to_mode_reg (mode1, op0);
19707 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19708 op1 = copy_to_mode_reg (mode2, op1);
19710 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19712 error ("index mask must be an immediate");
19713 return gen_reg_rtx (tmode);
19715 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19717 error ("length mask must be an immediate");
19718 return gen_reg_rtx (tmode);
19720 if (optimize || target == 0
19721 || GET_MODE (target) != tmode
19722 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19723 target = gen_reg_rtx (tmode);
19724 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19730 case IX86_BUILTIN_VEC_INIT_V2SI:
19731 case IX86_BUILTIN_VEC_INIT_V4HI:
19732 case IX86_BUILTIN_VEC_INIT_V8QI:
19733 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19735 case IX86_BUILTIN_VEC_EXT_V2DF:
19736 case IX86_BUILTIN_VEC_EXT_V2DI:
19737 case IX86_BUILTIN_VEC_EXT_V4SF:
19738 case IX86_BUILTIN_VEC_EXT_V4SI:
19739 case IX86_BUILTIN_VEC_EXT_V8HI:
19740 case IX86_BUILTIN_VEC_EXT_V2SI:
19741 case IX86_BUILTIN_VEC_EXT_V4HI:
19742 case IX86_BUILTIN_VEC_EXT_V16QI:
19743 return ix86_expand_vec_ext_builtin (exp, target);
19745 case IX86_BUILTIN_VEC_SET_V2DI:
19746 case IX86_BUILTIN_VEC_SET_V4SF:
19747 case IX86_BUILTIN_VEC_SET_V4SI:
19748 case IX86_BUILTIN_VEC_SET_V8HI:
19749 case IX86_BUILTIN_VEC_SET_V4HI:
19750 case IX86_BUILTIN_VEC_SET_V16QI:
19751 return ix86_expand_vec_set_builtin (exp);
19753 case IX86_BUILTIN_INFQ:
19755 REAL_VALUE_TYPE inf;
19759 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19761 tmp = validize_mem (force_const_mem (mode, tmp));
19764 target = gen_reg_rtx (mode);
19766 emit_move_insn (target, tmp);
19770 case IX86_BUILTIN_FABSQ:
19771 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19773 case IX86_BUILTIN_COPYSIGNQ:
19774 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19780 for (i = 0, d = bdesc_sse_3arg;
19781 i < ARRAY_SIZE (bdesc_sse_3arg);
19783 if (d->code == fcode)
19784 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19787 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19788 if (d->code == fcode)
19790 /* Compares are treated specially. */
19791 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19792 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19793 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19794 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19795 return ix86_expand_sse_compare (d, exp, target);
19797 return ix86_expand_binop_builtin (d->icode, exp, target);
19800 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19801 if (d->code == fcode)
19802 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19804 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19805 if (d->code == fcode)
19806 return ix86_expand_sse_comi (d, exp, target);
19808 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19809 if (d->code == fcode)
19810 return ix86_expand_sse_ptest (d, exp, target);
19812 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19813 if (d->code == fcode)
19814 return ix86_expand_crc32 (d->icode, exp, target);
19816 for (i = 0, d = bdesc_pcmpestr;
19817 i < ARRAY_SIZE (bdesc_pcmpestr);
19819 if (d->code == fcode)
19820 return ix86_expand_sse_pcmpestr (d, exp, target);
19822 for (i = 0, d = bdesc_pcmpistr;
19823 i < ARRAY_SIZE (bdesc_pcmpistr);
19825 if (d->code == fcode)
19826 return ix86_expand_sse_pcmpistr (d, exp, target);
19828 gcc_unreachable ();
19831 /* Returns a function decl for a vectorized version of the builtin function
19832 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19833 if it is not available. */
19836 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19839 enum machine_mode in_mode, out_mode;
19842 if (TREE_CODE (type_out) != VECTOR_TYPE
19843 || TREE_CODE (type_in) != VECTOR_TYPE)
19846 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19847 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19848 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19849 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19853 case BUILT_IN_SQRT:
19854 if (out_mode == DFmode && out_n == 2
19855 && in_mode == DFmode && in_n == 2)
19856 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19859 case BUILT_IN_SQRTF:
19860 if (out_mode == SFmode && out_n == 4
19861 && in_mode == SFmode && in_n == 4)
19862 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19865 case BUILT_IN_LRINT:
19866 if (out_mode == SImode && out_n == 4
19867 && in_mode == DFmode && in_n == 2)
19868 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19871 case BUILT_IN_LRINTF:
19872 if (out_mode == SImode && out_n == 4
19873 && in_mode == SFmode && in_n == 4)
19874 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19884 /* Returns a decl of a function that implements conversion of the
19885 input vector of type TYPE, or NULL_TREE if it is not available. */
19888 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
19890 if (TREE_CODE (type) != VECTOR_TYPE)
19896 switch (TYPE_MODE (type))
19899 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19904 case FIX_TRUNC_EXPR:
19905 switch (TYPE_MODE (type))
19908 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19918 /* Returns a code for a target-specific builtin that implements
19919 reciprocal of the function, or NULL_TREE if not available. */
19922 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
19923 bool sqrt ATTRIBUTE_UNUSED)
19925 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
19926 && flag_finite_math_only && !flag_trapping_math
19927 && flag_unsafe_math_optimizations))
19931 /* Machine dependent builtins. */
19934 /* Vectorized version of sqrt to rsqrt conversion. */
19935 case IX86_BUILTIN_SQRTPS:
19936 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
19942 /* Normal builtins. */
19945 /* Sqrt to rsqrt conversion. */
19946 case BUILT_IN_SQRTF:
19947 return ix86_builtins[IX86_BUILTIN_RSQRTF];
19954 /* Store OPERAND to the memory after reload is completed. This means
19955 that we can't easily use assign_stack_local. */
19957 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19961 gcc_assert (reload_completed);
19962 if (TARGET_RED_ZONE)
19964 result = gen_rtx_MEM (mode,
19965 gen_rtx_PLUS (Pmode,
19967 GEN_INT (-RED_ZONE_SIZE)));
19968 emit_move_insn (result, operand);
19970 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19976 operand = gen_lowpart (DImode, operand);
19980 gen_rtx_SET (VOIDmode,
19981 gen_rtx_MEM (DImode,
19982 gen_rtx_PRE_DEC (DImode,
19983 stack_pointer_rtx)),
19987 gcc_unreachable ();
19989 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19998 split_di (&operand, 1, operands, operands + 1);
20000 gen_rtx_SET (VOIDmode,
20001 gen_rtx_MEM (SImode,
20002 gen_rtx_PRE_DEC (Pmode,
20003 stack_pointer_rtx)),
20006 gen_rtx_SET (VOIDmode,
20007 gen_rtx_MEM (SImode,
20008 gen_rtx_PRE_DEC (Pmode,
20009 stack_pointer_rtx)),
20014 /* Store HImodes as SImodes. */
20015 operand = gen_lowpart (SImode, operand);
20019 gen_rtx_SET (VOIDmode,
20020 gen_rtx_MEM (GET_MODE (operand),
20021 gen_rtx_PRE_DEC (SImode,
20022 stack_pointer_rtx)),
20026 gcc_unreachable ();
20028 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20033 /* Free operand from the memory. */
20035 ix86_free_from_memory (enum machine_mode mode)
20037 if (!TARGET_RED_ZONE)
20041 if (mode == DImode || TARGET_64BIT)
20045 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20046 to pop or add instruction if registers are available. */
20047 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20048 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20053 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20054 QImode must go into class Q_REGS.
20055 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20056 movdf to do mem-to-mem moves through integer regs. */
20058 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20060 enum machine_mode mode = GET_MODE (x);
20062 /* We're only allowed to return a subclass of CLASS. Many of the
20063 following checks fail for NO_REGS, so eliminate that early. */
20064 if (regclass == NO_REGS)
20067 /* All classes can load zeros. */
20068 if (x == CONST0_RTX (mode))
20071 /* Force constants into memory if we are loading a (nonzero) constant into
20072 an MMX or SSE register. This is because there are no MMX/SSE instructions
20073 to load from a constant. */
20075 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20078 /* Prefer SSE regs only, if we can use them for math. */
20079 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20080 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20082 /* Floating-point constants need more complex checks. */
20083 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20085 /* General regs can load everything. */
20086 if (reg_class_subset_p (regclass, GENERAL_REGS))
20089 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20090 zero above. We only want to wind up preferring 80387 registers if
20091 we plan on doing computation with them. */
20093 && standard_80387_constant_p (x))
20095 /* Limit class to non-sse. */
20096 if (regclass == FLOAT_SSE_REGS)
20098 if (regclass == FP_TOP_SSE_REGS)
20100 if (regclass == FP_SECOND_SSE_REGS)
20101 return FP_SECOND_REG;
20102 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20109 /* Generally when we see PLUS here, it's the function invariant
20110 (plus soft-fp const_int). Which can only be computed into general
20112 if (GET_CODE (x) == PLUS)
20113 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20115 /* QImode constants are easy to load, but non-constant QImode data
20116 must go into Q_REGS. */
20117 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20119 if (reg_class_subset_p (regclass, Q_REGS))
20121 if (reg_class_subset_p (Q_REGS, regclass))
20129 /* Discourage putting floating-point values in SSE registers unless
20130 SSE math is being used, and likewise for the 387 registers. */
20132 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20134 enum machine_mode mode = GET_MODE (x);
20136 /* Restrict the output reload class to the register bank that we are doing
20137 math on. If we would like not to return a subset of CLASS, reject this
20138 alternative: if reload cannot do this, it will still use its choice. */
20139 mode = GET_MODE (x);
20140 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20141 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20143 if (X87_FLOAT_MODE_P (mode))
20145 if (regclass == FP_TOP_SSE_REGS)
20147 else if (regclass == FP_SECOND_SSE_REGS)
20148 return FP_SECOND_REG;
20150 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20156 /* If we are copying between general and FP registers, we need a memory
20157 location. The same is true for SSE and MMX registers.
20159 The macro can't work reliably when one of the CLASSES is class containing
20160 registers from multiple units (SSE, MMX, integer). We avoid this by never
20161 combining those units in single alternative in the machine description.
20162 Ensure that this constraint holds to avoid unexpected surprises.
20164 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20165 enforce these sanity checks. */
20168 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20169 enum machine_mode mode, int strict)
20171 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20172 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20173 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20174 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20175 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20176 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20178 gcc_assert (!strict);
20182 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20185 /* ??? This is a lie. We do have moves between mmx/general, and for
20186 mmx/sse2. But by saying we need secondary memory we discourage the
20187 register allocator from using the mmx registers unless needed. */
20188 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20191 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20193 /* SSE1 doesn't have any direct moves from other classes. */
20197 /* If the target says that inter-unit moves are more expensive
20198 than moving through memory, then don't generate them. */
20199 if (!TARGET_INTER_UNIT_MOVES)
20202 /* Between SSE and general, we have moves no larger than word size. */
20203 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20210 /* Return true if the registers in CLASS cannot represent the change from
20211 modes FROM to TO. */
20214 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20215 enum reg_class regclass)
20220 /* x87 registers can't do subreg at all, as all values are reformatted
20221 to extended precision. */
20222 if (MAYBE_FLOAT_CLASS_P (regclass))
20225 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20227 /* Vector registers do not support QI or HImode loads. If we don't
20228 disallow a change to these modes, reload will assume it's ok to
20229 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20230 the vec_dupv4hi pattern. */
20231 if (GET_MODE_SIZE (from) < 4)
20234 /* Vector registers do not support subreg with nonzero offsets, which
20235 are otherwise valid for integer registers. Since we can't see
20236 whether we have a nonzero offset from here, prohibit all
20237 nonparadoxical subregs changing size. */
20238 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20245 /* Return the cost of moving data from a register in class CLASS1 to
20246 one in class CLASS2.
20248 It is not required that the cost always equal 2 when FROM is the same as TO;
20249 on some machines it is expensive to move between registers if they are not
20250 general registers. */
20253 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20254 enum reg_class class2)
20256 /* In case we require secondary memory, compute cost of the store followed
20257 by load. In order to avoid bad register allocation choices, we need
20258 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20260 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
20264 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
20265 MEMORY_MOVE_COST (mode, class1, 1));
20266 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
20267 MEMORY_MOVE_COST (mode, class2, 1));
20269 /* In case of copying from general_purpose_register we may emit multiple
20270 stores followed by single load causing memory size mismatch stall.
20271 Count this as arbitrarily high cost of 20. */
20272 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20275 /* In the case of FP/MMX moves, the registers actually overlap, and we
20276 have to switch modes in order to treat them differently. */
20277 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20278 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20284 /* Moves between SSE/MMX and integer unit are expensive. */
20285 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20286 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20288 /* ??? By keeping returned value relatively high, we limit the number
20289 of moves between integer and MMX/SSE registers for all targets.
20290 Additionally, high value prevents problem with x86_modes_tieable_p(),
20291 where integer modes in MMX/SSE registers are not tieable
20292 because of missing QImode and HImode moves to, from or between
20293 MMX/SSE registers. */
20294 return MAX (ix86_cost->mmxsse_to_integer, 8);
20296 if (MAYBE_FLOAT_CLASS_P (class1))
20297 return ix86_cost->fp_move;
20298 if (MAYBE_SSE_CLASS_P (class1))
20299 return ix86_cost->sse_move;
20300 if (MAYBE_MMX_CLASS_P (class1))
20301 return ix86_cost->mmx_move;
20305 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20308 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20310 /* Flags and only flags can only hold CCmode values. */
20311 if (CC_REGNO_P (regno))
20312 return GET_MODE_CLASS (mode) == MODE_CC;
20313 if (GET_MODE_CLASS (mode) == MODE_CC
20314 || GET_MODE_CLASS (mode) == MODE_RANDOM
20315 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20317 if (FP_REGNO_P (regno))
20318 return VALID_FP_MODE_P (mode);
20319 if (SSE_REGNO_P (regno))
20321 /* We implement the move patterns for all vector modes into and
20322 out of SSE registers, even when no operation instructions
20324 return (VALID_SSE_REG_MODE (mode)
20325 || VALID_SSE2_REG_MODE (mode)
20326 || VALID_MMX_REG_MODE (mode)
20327 || VALID_MMX_REG_MODE_3DNOW (mode));
20329 if (MMX_REGNO_P (regno))
20331 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20332 so if the register is available at all, then we can move data of
20333 the given mode into or out of it. */
20334 return (VALID_MMX_REG_MODE (mode)
20335 || VALID_MMX_REG_MODE_3DNOW (mode));
20338 if (mode == QImode)
20340 /* Take care for QImode values - they can be in non-QI regs,
20341 but then they do cause partial register stalls. */
20342 if (regno < 4 || TARGET_64BIT)
20344 if (!TARGET_PARTIAL_REG_STALL)
20346 return reload_in_progress || reload_completed;
20348 /* We handle both integer and floats in the general purpose registers. */
20349 else if (VALID_INT_MODE_P (mode))
20351 else if (VALID_FP_MODE_P (mode))
20353 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20354 on to use that value in smaller contexts, this can easily force a
20355 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20356 supporting DImode, allow it. */
20357 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20363 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20364 tieable integer mode. */
20367 ix86_tieable_integer_mode_p (enum machine_mode mode)
20376 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20379 return TARGET_64BIT;
20386 /* Return true if MODE1 is accessible in a register that can hold MODE2
20387 without copying. That is, all register classes that can hold MODE2
20388 can also hold MODE1. */
20391 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20393 if (mode1 == mode2)
20396 if (ix86_tieable_integer_mode_p (mode1)
20397 && ix86_tieable_integer_mode_p (mode2))
20400 /* MODE2 being XFmode implies fp stack or general regs, which means we
20401 can tie any smaller floating point modes to it. Note that we do not
20402 tie this with TFmode. */
20403 if (mode2 == XFmode)
20404 return mode1 == SFmode || mode1 == DFmode;
20406 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20407 that we can tie it with SFmode. */
20408 if (mode2 == DFmode)
20409 return mode1 == SFmode;
20411 /* If MODE2 is only appropriate for an SSE register, then tie with
20412 any other mode acceptable to SSE registers. */
20413 if (GET_MODE_SIZE (mode2) == 16
20414 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20415 return (GET_MODE_SIZE (mode1) == 16
20416 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20418 /* If MODE2 is appropriate for an MMX register, then tie
20419 with any other mode acceptable to MMX registers. */
20420 if (GET_MODE_SIZE (mode2) == 8
20421 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20422 return (GET_MODE_SIZE (mode1) == 8
20423 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20428 /* Return the cost of moving data of mode M between a
20429 register and memory. A value of 2 is the default; this cost is
20430 relative to those in `REGISTER_MOVE_COST'.
20432 If moving between registers and memory is more expensive than
20433 between two registers, you should define this macro to express the
20436 Model also increased moving costs of QImode registers in non
20440 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20442 if (FLOAT_CLASS_P (regclass))
20459 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20461 if (SSE_CLASS_P (regclass))
20464 switch (GET_MODE_SIZE (mode))
20478 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20480 if (MMX_CLASS_P (regclass))
20483 switch (GET_MODE_SIZE (mode))
20494 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20496 switch (GET_MODE_SIZE (mode))
20500 return (Q_CLASS_P (regclass) ? ix86_cost->int_load[0]
20501 : ix86_cost->movzbl_load);
20503 return (Q_CLASS_P (regclass) ? ix86_cost->int_store[0]
20504 : ix86_cost->int_store[0] + 4);
20507 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20509 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20510 if (mode == TFmode)
20512 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
20513 * (((int) GET_MODE_SIZE (mode)
20514 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20518 /* Compute a (partial) cost for rtx X. Return true if the complete
20519 cost has been computed, and false if subexpressions should be
20520 scanned. In either case, *TOTAL contains the cost result. */
20523 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20525 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20526 enum machine_mode mode = GET_MODE (x);
20534 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20536 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20538 else if (flag_pic && SYMBOLIC_CONST (x)
20540 || (!GET_CODE (x) != LABEL_REF
20541 && (GET_CODE (x) != SYMBOL_REF
20542 || !SYMBOL_REF_LOCAL_P (x)))))
20549 if (mode == VOIDmode)
20552 switch (standard_80387_constant_p (x))
20557 default: /* Other constants */
20562 /* Start with (MEM (SYMBOL_REF)), since that's where
20563 it'll probably end up. Add a penalty for size. */
20564 *total = (COSTS_N_INSNS (1)
20565 + (flag_pic != 0 && !TARGET_64BIT)
20566 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20572 /* The zero extensions is often completely free on x86_64, so make
20573 it as cheap as possible. */
20574 if (TARGET_64BIT && mode == DImode
20575 && GET_MODE (XEXP (x, 0)) == SImode)
20577 else if (TARGET_ZERO_EXTEND_WITH_AND)
20578 *total = ix86_cost->add;
20580 *total = ix86_cost->movzx;
20584 *total = ix86_cost->movsx;
20588 if (CONST_INT_P (XEXP (x, 1))
20589 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20591 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20594 *total = ix86_cost->add;
20597 if ((value == 2 || value == 3)
20598 && ix86_cost->lea <= ix86_cost->shift_const)
20600 *total = ix86_cost->lea;
20610 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20612 if (CONST_INT_P (XEXP (x, 1)))
20614 if (INTVAL (XEXP (x, 1)) > 32)
20615 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20617 *total = ix86_cost->shift_const * 2;
20621 if (GET_CODE (XEXP (x, 1)) == AND)
20622 *total = ix86_cost->shift_var * 2;
20624 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20629 if (CONST_INT_P (XEXP (x, 1)))
20630 *total = ix86_cost->shift_const;
20632 *total = ix86_cost->shift_var;
20637 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20639 /* ??? SSE scalar cost should be used here. */
20640 *total = ix86_cost->fmul;
20643 else if (X87_FLOAT_MODE_P (mode))
20645 *total = ix86_cost->fmul;
20648 else if (FLOAT_MODE_P (mode))
20650 /* ??? SSE vector cost should be used here. */
20651 *total = ix86_cost->fmul;
20656 rtx op0 = XEXP (x, 0);
20657 rtx op1 = XEXP (x, 1);
20659 if (CONST_INT_P (XEXP (x, 1)))
20661 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20662 for (nbits = 0; value != 0; value &= value - 1)
20666 /* This is arbitrary. */
20669 /* Compute costs correctly for widening multiplication. */
20670 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20671 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20672 == GET_MODE_SIZE (mode))
20674 int is_mulwiden = 0;
20675 enum machine_mode inner_mode = GET_MODE (op0);
20677 if (GET_CODE (op0) == GET_CODE (op1))
20678 is_mulwiden = 1, op1 = XEXP (op1, 0);
20679 else if (CONST_INT_P (op1))
20681 if (GET_CODE (op0) == SIGN_EXTEND)
20682 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20685 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20689 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20692 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20693 + nbits * ix86_cost->mult_bit
20694 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20703 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20704 /* ??? SSE cost should be used here. */
20705 *total = ix86_cost->fdiv;
20706 else if (X87_FLOAT_MODE_P (mode))
20707 *total = ix86_cost->fdiv;
20708 else if (FLOAT_MODE_P (mode))
20709 /* ??? SSE vector cost should be used here. */
20710 *total = ix86_cost->fdiv;
20712 *total = ix86_cost->divide[MODE_INDEX (mode)];
20716 if (GET_MODE_CLASS (mode) == MODE_INT
20717 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20719 if (GET_CODE (XEXP (x, 0)) == PLUS
20720 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20721 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20722 && CONSTANT_P (XEXP (x, 1)))
20724 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20725 if (val == 2 || val == 4 || val == 8)
20727 *total = ix86_cost->lea;
20728 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20729 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20731 *total += rtx_cost (XEXP (x, 1), outer_code);
20735 else if (GET_CODE (XEXP (x, 0)) == MULT
20736 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20738 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20739 if (val == 2 || val == 4 || val == 8)
20741 *total = ix86_cost->lea;
20742 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20743 *total += rtx_cost (XEXP (x, 1), outer_code);
20747 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20749 *total = ix86_cost->lea;
20750 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20751 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20752 *total += rtx_cost (XEXP (x, 1), outer_code);
20759 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20761 /* ??? SSE cost should be used here. */
20762 *total = ix86_cost->fadd;
20765 else if (X87_FLOAT_MODE_P (mode))
20767 *total = ix86_cost->fadd;
20770 else if (FLOAT_MODE_P (mode))
20772 /* ??? SSE vector cost should be used here. */
20773 *total = ix86_cost->fadd;
20781 if (!TARGET_64BIT && mode == DImode)
20783 *total = (ix86_cost->add * 2
20784 + (rtx_cost (XEXP (x, 0), outer_code)
20785 << (GET_MODE (XEXP (x, 0)) != DImode))
20786 + (rtx_cost (XEXP (x, 1), outer_code)
20787 << (GET_MODE (XEXP (x, 1)) != DImode)));
20793 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20795 /* ??? SSE cost should be used here. */
20796 *total = ix86_cost->fchs;
20799 else if (X87_FLOAT_MODE_P (mode))
20801 *total = ix86_cost->fchs;
20804 else if (FLOAT_MODE_P (mode))
20806 /* ??? SSE vector cost should be used here. */
20807 *total = ix86_cost->fchs;
20813 if (!TARGET_64BIT && mode == DImode)
20814 *total = ix86_cost->add * 2;
20816 *total = ix86_cost->add;
20820 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20821 && XEXP (XEXP (x, 0), 1) == const1_rtx
20822 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20823 && XEXP (x, 1) == const0_rtx)
20825 /* This kind of construct is implemented using test[bwl].
20826 Treat it as if we had an AND. */
20827 *total = (ix86_cost->add
20828 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20829 + rtx_cost (const1_rtx, outer_code));
20835 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20840 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20841 /* ??? SSE cost should be used here. */
20842 *total = ix86_cost->fabs;
20843 else if (X87_FLOAT_MODE_P (mode))
20844 *total = ix86_cost->fabs;
20845 else if (FLOAT_MODE_P (mode))
20846 /* ??? SSE vector cost should be used here. */
20847 *total = ix86_cost->fabs;
20851 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20852 /* ??? SSE cost should be used here. */
20853 *total = ix86_cost->fsqrt;
20854 else if (X87_FLOAT_MODE_P (mode))
20855 *total = ix86_cost->fsqrt;
20856 else if (FLOAT_MODE_P (mode))
20857 /* ??? SSE vector cost should be used here. */
20858 *total = ix86_cost->fsqrt;
20862 if (XINT (x, 1) == UNSPEC_TP)
20873 static int current_machopic_label_num;
20875 /* Given a symbol name and its associated stub, write out the
20876 definition of the stub. */
20879 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20881 unsigned int length;
20882 char *binder_name, *symbol_name, lazy_ptr_name[32];
20883 int label = ++current_machopic_label_num;
20885 /* For 64-bit we shouldn't get here. */
20886 gcc_assert (!TARGET_64BIT);
20888 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20889 symb = (*targetm.strip_name_encoding) (symb);
20891 length = strlen (stub);
20892 binder_name = alloca (length + 32);
20893 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20895 length = strlen (symb);
20896 symbol_name = alloca (length + 32);
20897 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20899 sprintf (lazy_ptr_name, "L%d$lz", label);
20902 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20904 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20906 fprintf (file, "%s:\n", stub);
20907 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20911 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20912 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20913 fprintf (file, "\tjmp\t*%%edx\n");
20916 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20918 fprintf (file, "%s:\n", binder_name);
20922 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20923 fprintf (file, "\tpushl\t%%eax\n");
20926 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20928 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20930 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20931 fprintf (file, "%s:\n", lazy_ptr_name);
20932 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20933 fprintf (file, "\t.long %s\n", binder_name);
20937 darwin_x86_file_end (void)
20939 darwin_file_end ();
20942 #endif /* TARGET_MACHO */
20944 /* Order the registers for register allocator. */
20947 x86_order_regs_for_local_alloc (void)
20952 /* First allocate the local general purpose registers. */
20953 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20954 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20955 reg_alloc_order [pos++] = i;
20957 /* Global general purpose registers. */
20958 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20959 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20960 reg_alloc_order [pos++] = i;
20962 /* x87 registers come first in case we are doing FP math
20964 if (!TARGET_SSE_MATH)
20965 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20966 reg_alloc_order [pos++] = i;
20968 /* SSE registers. */
20969 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20970 reg_alloc_order [pos++] = i;
20971 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20972 reg_alloc_order [pos++] = i;
20974 /* x87 registers. */
20975 if (TARGET_SSE_MATH)
20976 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20977 reg_alloc_order [pos++] = i;
20979 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20980 reg_alloc_order [pos++] = i;
20982 /* Initialize the rest of array as we do not allocate some registers
20984 while (pos < FIRST_PSEUDO_REGISTER)
20985 reg_alloc_order [pos++] = 0;
20988 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20989 struct attribute_spec.handler. */
20991 ix86_handle_struct_attribute (tree *node, tree name,
20992 tree args ATTRIBUTE_UNUSED,
20993 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20996 if (DECL_P (*node))
20998 if (TREE_CODE (*node) == TYPE_DECL)
20999 type = &TREE_TYPE (*node);
21004 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21005 || TREE_CODE (*type) == UNION_TYPE)))
21007 warning (OPT_Wattributes, "%qs attribute ignored",
21008 IDENTIFIER_POINTER (name));
21009 *no_add_attrs = true;
21012 else if ((is_attribute_p ("ms_struct", name)
21013 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21014 || ((is_attribute_p ("gcc_struct", name)
21015 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21017 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21018 IDENTIFIER_POINTER (name));
21019 *no_add_attrs = true;
21026 ix86_ms_bitfield_layout_p (tree record_type)
21028 return (TARGET_MS_BITFIELD_LAYOUT &&
21029 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21030 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21033 /* Returns an expression indicating where the this parameter is
21034 located on entry to the FUNCTION. */
21037 x86_this_parameter (tree function)
21039 tree type = TREE_TYPE (function);
21040 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21044 const int *parm_regs;
21046 if (TARGET_64BIT_MS_ABI)
21047 parm_regs = x86_64_ms_abi_int_parameter_registers;
21049 parm_regs = x86_64_int_parameter_registers;
21050 return gen_rtx_REG (DImode, parm_regs[aggr]);
21053 if (ix86_function_regparm (type, function) > 0
21054 && !type_has_variadic_args_p (type))
21057 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21059 return gen_rtx_REG (SImode, regno);
21062 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21065 /* Determine whether x86_output_mi_thunk can succeed. */
21068 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
21069 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21070 HOST_WIDE_INT vcall_offset, tree function)
21072 /* 64-bit can handle anything. */
21076 /* For 32-bit, everything's fine if we have one free register. */
21077 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21080 /* Need a free register for vcall_offset. */
21084 /* Need a free register for GOT references. */
21085 if (flag_pic && !(*targetm.binds_local_p) (function))
21088 /* Otherwise ok. */
21092 /* Output the assembler code for a thunk function. THUNK_DECL is the
21093 declaration for the thunk function itself, FUNCTION is the decl for
21094 the target function. DELTA is an immediate constant offset to be
21095 added to THIS. If VCALL_OFFSET is nonzero, the word at
21096 *(*this + vcall_offset) should be added to THIS. */
21099 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21100 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21101 HOST_WIDE_INT vcall_offset, tree function)
21104 rtx this_param = x86_this_parameter (function);
21107 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21108 pull it in now and let DELTA benefit. */
21109 if (REG_P (this_param))
21110 this_reg = this_param;
21111 else if (vcall_offset)
21113 /* Put the this parameter into %eax. */
21114 xops[0] = this_param;
21115 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21116 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21119 this_reg = NULL_RTX;
21121 /* Adjust the this parameter by a fixed constant. */
21124 xops[0] = GEN_INT (delta);
21125 xops[1] = this_reg ? this_reg : this_param;
21128 if (!x86_64_general_operand (xops[0], DImode))
21130 tmp = gen_rtx_REG (DImode, R10_REG);
21132 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21134 xops[1] = this_param;
21136 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21139 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21142 /* Adjust the this parameter by a value stored in the vtable. */
21146 tmp = gen_rtx_REG (DImode, R10_REG);
21149 int tmp_regno = 2 /* ECX */;
21150 if (lookup_attribute ("fastcall",
21151 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21152 tmp_regno = 0 /* EAX */;
21153 tmp = gen_rtx_REG (SImode, tmp_regno);
21156 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21159 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21161 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21163 /* Adjust the this parameter. */
21164 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21165 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21167 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21168 xops[0] = GEN_INT (vcall_offset);
21170 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21171 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21173 xops[1] = this_reg;
21175 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21177 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21180 /* If necessary, drop THIS back to its stack slot. */
21181 if (this_reg && this_reg != this_param)
21183 xops[0] = this_reg;
21184 xops[1] = this_param;
21185 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21188 xops[0] = XEXP (DECL_RTL (function), 0);
21191 if (!flag_pic || (*targetm.binds_local_p) (function))
21192 output_asm_insn ("jmp\t%P0", xops);
21193 /* All thunks should be in the same object as their target,
21194 and thus binds_local_p should be true. */
21195 else if (TARGET_64BIT_MS_ABI)
21196 gcc_unreachable ();
21199 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21200 tmp = gen_rtx_CONST (Pmode, tmp);
21201 tmp = gen_rtx_MEM (QImode, tmp);
21203 output_asm_insn ("jmp\t%A0", xops);
21208 if (!flag_pic || (*targetm.binds_local_p) (function))
21209 output_asm_insn ("jmp\t%P0", xops);
21214 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21215 tmp = (gen_rtx_SYMBOL_REF
21217 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21218 tmp = gen_rtx_MEM (QImode, tmp);
21220 output_asm_insn ("jmp\t%0", xops);
21223 #endif /* TARGET_MACHO */
21225 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21226 output_set_got (tmp, NULL_RTX);
21229 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21230 output_asm_insn ("jmp\t{*}%1", xops);
21236 x86_file_start (void)
21238 default_file_start ();
21240 darwin_file_start ();
21242 if (X86_FILE_START_VERSION_DIRECTIVE)
21243 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21244 if (X86_FILE_START_FLTUSED)
21245 fputs ("\t.global\t__fltused\n", asm_out_file);
21246 if (ix86_asm_dialect == ASM_INTEL)
21247 fputs ("\t.intel_syntax\n", asm_out_file);
21251 x86_field_alignment (tree field, int computed)
21253 enum machine_mode mode;
21254 tree type = TREE_TYPE (field);
21256 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21258 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21259 ? get_inner_array_type (type) : type);
21260 if (mode == DFmode || mode == DCmode
21261 || GET_MODE_CLASS (mode) == MODE_INT
21262 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21263 return MIN (32, computed);
21267 /* Output assembler code to FILE to increment profiler label # LABELNO
21268 for profiling a function entry. */
21270 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21274 #ifndef NO_PROFILE_COUNTERS
21275 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21278 if (!TARGET_64BIT_MS_ABI && flag_pic)
21279 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21281 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21285 #ifndef NO_PROFILE_COUNTERS
21286 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21287 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21289 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21293 #ifndef NO_PROFILE_COUNTERS
21294 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21295 PROFILE_COUNT_REGISTER);
21297 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21301 /* We don't have exact information about the insn sizes, but we may assume
21302 quite safely that we are informed about all 1 byte insns and memory
21303 address sizes. This is enough to eliminate unnecessary padding in
21307 min_insn_size (rtx insn)
21311 if (!INSN_P (insn) || !active_insn_p (insn))
21314 /* Discard alignments we've emit and jump instructions. */
21315 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21316 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21319 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21320 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21323 /* Important case - calls are always 5 bytes.
21324 It is common to have many calls in the row. */
21326 && symbolic_reference_mentioned_p (PATTERN (insn))
21327 && !SIBLING_CALL_P (insn))
21329 if (get_attr_length (insn) <= 1)
21332 /* For normal instructions we may rely on the sizes of addresses
21333 and the presence of symbol to require 4 bytes of encoding.
21334 This is not the case for jumps where references are PC relative. */
21335 if (!JUMP_P (insn))
21337 l = get_attr_length_address (insn);
21338 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21347 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21351 ix86_avoid_jump_misspredicts (void)
21353 rtx insn, start = get_insns ();
21354 int nbytes = 0, njumps = 0;
21357 /* Look for all minimal intervals of instructions containing 4 jumps.
21358 The intervals are bounded by START and INSN. NBYTES is the total
21359 size of instructions in the interval including INSN and not including
21360 START. When the NBYTES is smaller than 16 bytes, it is possible
21361 that the end of START and INSN ends up in the same 16byte page.
21363 The smallest offset in the page INSN can start is the case where START
21364 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21365 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21367 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21370 nbytes += min_insn_size (insn);
21372 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21373 INSN_UID (insn), min_insn_size (insn));
21375 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21376 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21384 start = NEXT_INSN (start);
21385 if ((JUMP_P (start)
21386 && GET_CODE (PATTERN (start)) != ADDR_VEC
21387 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21389 njumps--, isjump = 1;
21392 nbytes -= min_insn_size (start);
21394 gcc_assert (njumps >= 0);
21396 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21397 INSN_UID (start), INSN_UID (insn), nbytes);
21399 if (njumps == 3 && isjump && nbytes < 16)
21401 int padsize = 15 - nbytes + min_insn_size (insn);
21404 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21405 INSN_UID (insn), padsize);
21406 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21411 /* AMD Athlon works faster
21412 when RET is not destination of conditional jump or directly preceded
21413 by other jump instruction. We avoid the penalty by inserting NOP just
21414 before the RET instructions in such cases. */
21416 ix86_pad_returns (void)
21421 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21423 basic_block bb = e->src;
21424 rtx ret = BB_END (bb);
21426 bool replace = false;
21428 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21429 || !maybe_hot_bb_p (bb))
21431 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21432 if (active_insn_p (prev) || LABEL_P (prev))
21434 if (prev && LABEL_P (prev))
21439 FOR_EACH_EDGE (e, ei, bb->preds)
21440 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21441 && !(e->flags & EDGE_FALLTHRU))
21446 prev = prev_active_insn (ret);
21448 && ((JUMP_P (prev) && any_condjump_p (prev))
21451 /* Empty functions get branch mispredict even when the jump destination
21452 is not visible to us. */
21453 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21458 emit_insn_before (gen_return_internal_long (), ret);
21464 /* Implement machine specific optimizations. We implement padding of returns
21465 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21469 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21470 ix86_pad_returns ();
21471 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21472 ix86_avoid_jump_misspredicts ();
21475 /* Return nonzero when QImode register that must be represented via REX prefix
21478 x86_extended_QIreg_mentioned_p (rtx insn)
21481 extract_insn_cached (insn);
21482 for (i = 0; i < recog_data.n_operands; i++)
21483 if (REG_P (recog_data.operand[i])
21484 && REGNO (recog_data.operand[i]) >= 4)
21489 /* Return nonzero when P points to register encoded via REX prefix.
21490 Called via for_each_rtx. */
21492 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21494 unsigned int regno;
21497 regno = REGNO (*p);
21498 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21501 /* Return true when INSN mentions register that must be encoded using REX
21504 x86_extended_reg_mentioned_p (rtx insn)
21506 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21509 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21510 optabs would emit if we didn't have TFmode patterns. */
21513 x86_emit_floatuns (rtx operands[2])
21515 rtx neglab, donelab, i0, i1, f0, in, out;
21516 enum machine_mode mode, inmode;
21518 inmode = GET_MODE (operands[1]);
21519 gcc_assert (inmode == SImode || inmode == DImode);
21522 in = force_reg (inmode, operands[1]);
21523 mode = GET_MODE (out);
21524 neglab = gen_label_rtx ();
21525 donelab = gen_label_rtx ();
21526 f0 = gen_reg_rtx (mode);
21528 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21530 expand_float (out, in, 0);
21532 emit_jump_insn (gen_jump (donelab));
21535 emit_label (neglab);
21537 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21539 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21541 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21543 expand_float (f0, i0, 0);
21545 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21547 emit_label (donelab);
21550 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21551 with all elements equal to VAR. Return true if successful. */
21554 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21555 rtx target, rtx val)
21557 enum machine_mode smode, wsmode, wvmode;
21572 val = force_reg (GET_MODE_INNER (mode), val);
21573 x = gen_rtx_VEC_DUPLICATE (mode, val);
21574 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21580 if (TARGET_SSE || TARGET_3DNOW_A)
21582 val = gen_lowpart (SImode, val);
21583 x = gen_rtx_TRUNCATE (HImode, val);
21584 x = gen_rtx_VEC_DUPLICATE (mode, x);
21585 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21607 /* Extend HImode to SImode using a paradoxical SUBREG. */
21608 tmp1 = gen_reg_rtx (SImode);
21609 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21610 /* Insert the SImode value as low element of V4SImode vector. */
21611 tmp2 = gen_reg_rtx (V4SImode);
21612 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21613 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21614 CONST0_RTX (V4SImode),
21616 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21617 /* Cast the V4SImode vector back to a V8HImode vector. */
21618 tmp1 = gen_reg_rtx (V8HImode);
21619 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21620 /* Duplicate the low short through the whole low SImode word. */
21621 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21622 /* Cast the V8HImode vector back to a V4SImode vector. */
21623 tmp2 = gen_reg_rtx (V4SImode);
21624 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21625 /* Replicate the low element of the V4SImode vector. */
21626 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21627 /* Cast the V2SImode back to V8HImode, and store in target. */
21628 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21639 /* Extend QImode to SImode using a paradoxical SUBREG. */
21640 tmp1 = gen_reg_rtx (SImode);
21641 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21642 /* Insert the SImode value as low element of V4SImode vector. */
21643 tmp2 = gen_reg_rtx (V4SImode);
21644 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21645 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21646 CONST0_RTX (V4SImode),
21648 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21649 /* Cast the V4SImode vector back to a V16QImode vector. */
21650 tmp1 = gen_reg_rtx (V16QImode);
21651 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21652 /* Duplicate the low byte through the whole low SImode word. */
21653 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21654 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21655 /* Cast the V16QImode vector back to a V4SImode vector. */
21656 tmp2 = gen_reg_rtx (V4SImode);
21657 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21658 /* Replicate the low element of the V4SImode vector. */
21659 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21660 /* Cast the V2SImode back to V16QImode, and store in target. */
21661 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21669 /* Replicate the value once into the next wider mode and recurse. */
21670 val = convert_modes (wsmode, smode, val, true);
21671 x = expand_simple_binop (wsmode, ASHIFT, val,
21672 GEN_INT (GET_MODE_BITSIZE (smode)),
21673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21674 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21676 x = gen_reg_rtx (wvmode);
21677 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21678 gcc_unreachable ();
21679 emit_move_insn (target, gen_lowpart (mode, x));
21687 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21688 whose ONE_VAR element is VAR, and other elements are zero. Return true
21692 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21693 rtx target, rtx var, int one_var)
21695 enum machine_mode vsimode;
21711 var = force_reg (GET_MODE_INNER (mode), var);
21712 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21713 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21718 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21719 new_target = gen_reg_rtx (mode);
21721 new_target = target;
21722 var = force_reg (GET_MODE_INNER (mode), var);
21723 x = gen_rtx_VEC_DUPLICATE (mode, var);
21724 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21725 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21728 /* We need to shuffle the value to the correct position, so
21729 create a new pseudo to store the intermediate result. */
21731 /* With SSE2, we can use the integer shuffle insns. */
21732 if (mode != V4SFmode && TARGET_SSE2)
21734 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21736 GEN_INT (one_var == 1 ? 0 : 1),
21737 GEN_INT (one_var == 2 ? 0 : 1),
21738 GEN_INT (one_var == 3 ? 0 : 1)));
21739 if (target != new_target)
21740 emit_move_insn (target, new_target);
21744 /* Otherwise convert the intermediate result to V4SFmode and
21745 use the SSE1 shuffle instructions. */
21746 if (mode != V4SFmode)
21748 tmp = gen_reg_rtx (V4SFmode);
21749 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21754 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21756 GEN_INT (one_var == 1 ? 0 : 1),
21757 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21758 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21760 if (mode != V4SFmode)
21761 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21762 else if (tmp != target)
21763 emit_move_insn (target, tmp);
21765 else if (target != new_target)
21766 emit_move_insn (target, new_target);
21771 vsimode = V4SImode;
21777 vsimode = V2SImode;
21783 /* Zero extend the variable element to SImode and recurse. */
21784 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21786 x = gen_reg_rtx (vsimode);
21787 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21789 gcc_unreachable ();
21791 emit_move_insn (target, gen_lowpart (mode, x));
21799 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21800 consisting of the values in VALS. It is known that all elements
21801 except ONE_VAR are constants. Return true if successful. */
21804 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21805 rtx target, rtx vals, int one_var)
21807 rtx var = XVECEXP (vals, 0, one_var);
21808 enum machine_mode wmode;
21811 const_vec = copy_rtx (vals);
21812 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21813 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21821 /* For the two element vectors, it's just as easy to use
21822 the general case. */
21838 /* There's no way to set one QImode entry easily. Combine
21839 the variable value with its adjacent constant value, and
21840 promote to an HImode set. */
21841 x = XVECEXP (vals, 0, one_var ^ 1);
21844 var = convert_modes (HImode, QImode, var, true);
21845 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21846 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21847 x = GEN_INT (INTVAL (x) & 0xff);
21851 var = convert_modes (HImode, QImode, var, true);
21852 x = gen_int_mode (INTVAL (x) << 8, HImode);
21854 if (x != const0_rtx)
21855 var = expand_simple_binop (HImode, IOR, var, x, var,
21856 1, OPTAB_LIB_WIDEN);
21858 x = gen_reg_rtx (wmode);
21859 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21860 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21862 emit_move_insn (target, gen_lowpart (mode, x));
21869 emit_move_insn (target, const_vec);
21870 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21874 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21875 all values variable, and none identical. */
21878 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21879 rtx target, rtx vals)
21881 enum machine_mode half_mode = GET_MODE_INNER (mode);
21882 rtx op0 = NULL, op1 = NULL;
21883 bool use_vec_concat = false;
21889 if (!mmx_ok && !TARGET_SSE)
21895 /* For the two element vectors, we always implement VEC_CONCAT. */
21896 op0 = XVECEXP (vals, 0, 0);
21897 op1 = XVECEXP (vals, 0, 1);
21898 use_vec_concat = true;
21902 half_mode = V2SFmode;
21905 half_mode = V2SImode;
21911 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21912 Recurse to load the two halves. */
21914 op0 = gen_reg_rtx (half_mode);
21915 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21916 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21918 op1 = gen_reg_rtx (half_mode);
21919 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21920 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21922 use_vec_concat = true;
21933 gcc_unreachable ();
21936 if (use_vec_concat)
21938 if (!register_operand (op0, half_mode))
21939 op0 = force_reg (half_mode, op0);
21940 if (!register_operand (op1, half_mode))
21941 op1 = force_reg (half_mode, op1);
21943 emit_insn (gen_rtx_SET (VOIDmode, target,
21944 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21948 int i, j, n_elts, n_words, n_elt_per_word;
21949 enum machine_mode inner_mode;
21950 rtx words[4], shift;
21952 inner_mode = GET_MODE_INNER (mode);
21953 n_elts = GET_MODE_NUNITS (mode);
21954 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21955 n_elt_per_word = n_elts / n_words;
21956 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21958 for (i = 0; i < n_words; ++i)
21960 rtx word = NULL_RTX;
21962 for (j = 0; j < n_elt_per_word; ++j)
21964 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21965 elt = convert_modes (word_mode, inner_mode, elt, true);
21971 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21972 word, 1, OPTAB_LIB_WIDEN);
21973 word = expand_simple_binop (word_mode, IOR, word, elt,
21974 word, 1, OPTAB_LIB_WIDEN);
21982 emit_move_insn (target, gen_lowpart (mode, words[0]));
21983 else if (n_words == 2)
21985 rtx tmp = gen_reg_rtx (mode);
21986 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21987 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21988 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21989 emit_move_insn (target, tmp);
21991 else if (n_words == 4)
21993 rtx tmp = gen_reg_rtx (V4SImode);
21994 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21995 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21996 emit_move_insn (target, gen_lowpart (mode, tmp));
21999 gcc_unreachable ();
22003 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22004 instructions unless MMX_OK is true. */
22007 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22009 enum machine_mode mode = GET_MODE (target);
22010 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22011 int n_elts = GET_MODE_NUNITS (mode);
22012 int n_var = 0, one_var = -1;
22013 bool all_same = true, all_const_zero = true;
22017 for (i = 0; i < n_elts; ++i)
22019 x = XVECEXP (vals, 0, i);
22020 if (!CONSTANT_P (x))
22021 n_var++, one_var = i;
22022 else if (x != CONST0_RTX (inner_mode))
22023 all_const_zero = false;
22024 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22028 /* Constants are best loaded from the constant pool. */
22031 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22035 /* If all values are identical, broadcast the value. */
22037 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22038 XVECEXP (vals, 0, 0)))
22041 /* Values where only one field is non-constant are best loaded from
22042 the pool and overwritten via move later. */
22046 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22047 XVECEXP (vals, 0, one_var),
22051 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22055 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22059 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22061 enum machine_mode mode = GET_MODE (target);
22062 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22063 bool use_vec_merge = false;
22072 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22073 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22075 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22077 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22078 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22084 use_vec_merge = TARGET_SSE4_1;
22092 /* For the two element vectors, we implement a VEC_CONCAT with
22093 the extraction of the other element. */
22095 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22096 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22099 op0 = val, op1 = tmp;
22101 op0 = tmp, op1 = val;
22103 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22104 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22109 use_vec_merge = TARGET_SSE4_1;
22116 use_vec_merge = true;
22120 /* tmp = target = A B C D */
22121 tmp = copy_to_reg (target);
22122 /* target = A A B B */
22123 emit_insn (gen_sse_unpcklps (target, target, target));
22124 /* target = X A B B */
22125 ix86_expand_vector_set (false, target, val, 0);
22126 /* target = A X C D */
22127 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22128 GEN_INT (1), GEN_INT (0),
22129 GEN_INT (2+4), GEN_INT (3+4)));
22133 /* tmp = target = A B C D */
22134 tmp = copy_to_reg (target);
22135 /* tmp = X B C D */
22136 ix86_expand_vector_set (false, tmp, val, 0);
22137 /* target = A B X D */
22138 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22139 GEN_INT (0), GEN_INT (1),
22140 GEN_INT (0+4), GEN_INT (3+4)));
22144 /* tmp = target = A B C D */
22145 tmp = copy_to_reg (target);
22146 /* tmp = X B C D */
22147 ix86_expand_vector_set (false, tmp, val, 0);
22148 /* target = A B X D */
22149 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22150 GEN_INT (0), GEN_INT (1),
22151 GEN_INT (2+4), GEN_INT (0+4)));
22155 gcc_unreachable ();
22160 use_vec_merge = TARGET_SSE4_1;
22164 /* Element 0 handled by vec_merge below. */
22167 use_vec_merge = true;
22173 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22174 store into element 0, then shuffle them back. */
22178 order[0] = GEN_INT (elt);
22179 order[1] = const1_rtx;
22180 order[2] = const2_rtx;
22181 order[3] = GEN_INT (3);
22182 order[elt] = const0_rtx;
22184 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22185 order[1], order[2], order[3]));
22187 ix86_expand_vector_set (false, target, val, 0);
22189 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22190 order[1], order[2], order[3]));
22194 /* For SSE1, we have to reuse the V4SF code. */
22195 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22196 gen_lowpart (SFmode, val), elt);
22201 use_vec_merge = TARGET_SSE2;
22204 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22208 use_vec_merge = TARGET_SSE4_1;
22218 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22219 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22220 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22224 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22226 emit_move_insn (mem, target);
22228 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22229 emit_move_insn (tmp, val);
22231 emit_move_insn (target, mem);
22236 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22238 enum machine_mode mode = GET_MODE (vec);
22239 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22240 bool use_vec_extr = false;
22253 use_vec_extr = true;
22257 use_vec_extr = TARGET_SSE4_1;
22269 tmp = gen_reg_rtx (mode);
22270 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22271 GEN_INT (elt), GEN_INT (elt),
22272 GEN_INT (elt+4), GEN_INT (elt+4)));
22276 tmp = gen_reg_rtx (mode);
22277 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22281 gcc_unreachable ();
22284 use_vec_extr = true;
22289 use_vec_extr = TARGET_SSE4_1;
22303 tmp = gen_reg_rtx (mode);
22304 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22305 GEN_INT (elt), GEN_INT (elt),
22306 GEN_INT (elt), GEN_INT (elt)));
22310 tmp = gen_reg_rtx (mode);
22311 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22315 gcc_unreachable ();
22318 use_vec_extr = true;
22323 /* For SSE1, we have to reuse the V4SF code. */
22324 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22325 gen_lowpart (V4SFmode, vec), elt);
22331 use_vec_extr = TARGET_SSE2;
22334 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22338 use_vec_extr = TARGET_SSE4_1;
22342 /* ??? Could extract the appropriate HImode element and shift. */
22349 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22350 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22352 /* Let the rtl optimizers know about the zero extension performed. */
22353 if (inner_mode == QImode || inner_mode == HImode)
22355 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22356 target = gen_lowpart (SImode, target);
22359 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22363 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22365 emit_move_insn (mem, vec);
22367 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22368 emit_move_insn (target, tmp);
22372 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22373 pattern to reduce; DEST is the destination; IN is the input vector. */
22376 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22378 rtx tmp1, tmp2, tmp3;
22380 tmp1 = gen_reg_rtx (V4SFmode);
22381 tmp2 = gen_reg_rtx (V4SFmode);
22382 tmp3 = gen_reg_rtx (V4SFmode);
22384 emit_insn (gen_sse_movhlps (tmp1, in, in));
22385 emit_insn (fn (tmp2, tmp1, in));
22387 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22388 GEN_INT (1), GEN_INT (1),
22389 GEN_INT (1+4), GEN_INT (1+4)));
22390 emit_insn (fn (dest, tmp2, tmp3));
22393 /* Target hook for scalar_mode_supported_p. */
22395 ix86_scalar_mode_supported_p (enum machine_mode mode)
22397 if (DECIMAL_FLOAT_MODE_P (mode))
22399 else if (mode == TFmode)
22400 return TARGET_64BIT;
22402 return default_scalar_mode_supported_p (mode);
22405 /* Implements target hook vector_mode_supported_p. */
22407 ix86_vector_mode_supported_p (enum machine_mode mode)
22409 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22411 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22413 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22415 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22420 /* Target hook for c_mode_for_suffix. */
22421 static enum machine_mode
22422 ix86_c_mode_for_suffix (char suffix)
22424 if (TARGET_64BIT && suffix == 'q')
22426 if (TARGET_MMX && suffix == 'w')
22432 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22434 We do this in the new i386 backend to maintain source compatibility
22435 with the old cc0-based compiler. */
22438 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22439 tree inputs ATTRIBUTE_UNUSED,
22442 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22444 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22449 /* Implements target vector targetm.asm.encode_section_info. This
22450 is not used by netware. */
22452 static void ATTRIBUTE_UNUSED
22453 ix86_encode_section_info (tree decl, rtx rtl, int first)
22455 default_encode_section_info (decl, rtl, first);
22457 if (TREE_CODE (decl) == VAR_DECL
22458 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22459 && ix86_in_large_data_p (decl))
22460 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22463 /* Worker function for REVERSE_CONDITION. */
22466 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22468 return (mode != CCFPmode && mode != CCFPUmode
22469 ? reverse_condition (code)
22470 : reverse_condition_maybe_unordered (code));
22473 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22477 output_387_reg_move (rtx insn, rtx *operands)
22479 if (REG_P (operands[0]))
22481 if (REG_P (operands[1])
22482 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22484 if (REGNO (operands[0]) == FIRST_STACK_REG)
22485 return output_387_ffreep (operands, 0);
22486 return "fstp\t%y0";
22488 if (STACK_TOP_P (operands[0]))
22489 return "fld%z1\t%y1";
22492 else if (MEM_P (operands[0]))
22494 gcc_assert (REG_P (operands[1]));
22495 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22496 return "fstp%z0\t%y0";
22499 /* There is no non-popping store to memory for XFmode.
22500 So if we need one, follow the store with a load. */
22501 if (GET_MODE (operands[0]) == XFmode)
22502 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22504 return "fst%z0\t%y0";
22511 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22512 FP status register is set. */
22515 ix86_emit_fp_unordered_jump (rtx label)
22517 rtx reg = gen_reg_rtx (HImode);
22520 emit_insn (gen_x86_fnstsw_1 (reg));
22522 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22524 emit_insn (gen_x86_sahf_1 (reg));
22526 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22527 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22531 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22533 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22534 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22537 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22538 gen_rtx_LABEL_REF (VOIDmode, label),
22540 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22542 emit_jump_insn (temp);
22543 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22546 /* Output code to perform a log1p XFmode calculation. */
22548 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22550 rtx label1 = gen_label_rtx ();
22551 rtx label2 = gen_label_rtx ();
22553 rtx tmp = gen_reg_rtx (XFmode);
22554 rtx tmp2 = gen_reg_rtx (XFmode);
22556 emit_insn (gen_absxf2 (tmp, op1));
22557 emit_insn (gen_cmpxf (tmp,
22558 CONST_DOUBLE_FROM_REAL_VALUE (
22559 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22561 emit_jump_insn (gen_bge (label1));
22563 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22564 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22565 emit_jump (label2);
22567 emit_label (label1);
22568 emit_move_insn (tmp, CONST1_RTX (XFmode));
22569 emit_insn (gen_addxf3 (tmp, op1, tmp));
22570 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22571 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22573 emit_label (label2);
22576 /* Output code to perform a Newton-Rhapson approximation of a single precision
22577 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22579 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22581 rtx x0, x1, e0, e1, two;
22583 x0 = gen_reg_rtx (mode);
22584 e0 = gen_reg_rtx (mode);
22585 e1 = gen_reg_rtx (mode);
22586 x1 = gen_reg_rtx (mode);
22588 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22590 if (VECTOR_MODE_P (mode))
22591 two = ix86_build_const_vector (SFmode, true, two);
22593 two = force_reg (mode, two);
22595 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22597 /* x0 = 1./b estimate */
22598 emit_insn (gen_rtx_SET (VOIDmode, x0,
22599 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22602 emit_insn (gen_rtx_SET (VOIDmode, e0,
22603 gen_rtx_MULT (mode, x0, b)));
22605 emit_insn (gen_rtx_SET (VOIDmode, e1,
22606 gen_rtx_MINUS (mode, two, e0)));
22608 emit_insn (gen_rtx_SET (VOIDmode, x1,
22609 gen_rtx_MULT (mode, x0, e1)));
22611 emit_insn (gen_rtx_SET (VOIDmode, res,
22612 gen_rtx_MULT (mode, a, x1)));
22615 /* Output code to perform a Newton-Rhapson approximation of a
22616 single precision floating point [reciprocal] square root. */
22618 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22621 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22623 x0 = gen_reg_rtx (mode);
22624 e0 = gen_reg_rtx (mode);
22625 e1 = gen_reg_rtx (mode);
22626 e2 = gen_reg_rtx (mode);
22627 e3 = gen_reg_rtx (mode);
22629 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22630 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22632 mask = gen_reg_rtx (mode);
22634 if (VECTOR_MODE_P (mode))
22636 three = ix86_build_const_vector (SFmode, true, three);
22637 half = ix86_build_const_vector (SFmode, true, half);
22640 three = force_reg (mode, three);
22641 half = force_reg (mode, half);
22643 zero = force_reg (mode, CONST0_RTX(mode));
22645 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22646 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22648 /* Compare a to zero. */
22649 emit_insn (gen_rtx_SET (VOIDmode, mask,
22650 gen_rtx_NE (mode, a, zero)));
22652 /* x0 = 1./sqrt(a) estimate */
22653 emit_insn (gen_rtx_SET (VOIDmode, x0,
22654 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22656 /* Filter out infinity. */
22657 if (VECTOR_MODE_P (mode))
22658 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22660 gen_lowpart (V4SFmode, x0),
22661 gen_lowpart (V4SFmode, mask))));
22663 emit_insn (gen_rtx_SET (VOIDmode, x0,
22664 gen_rtx_AND (mode, x0, mask)));
22667 emit_insn (gen_rtx_SET (VOIDmode, e0,
22668 gen_rtx_MULT (mode, x0, a)));
22670 emit_insn (gen_rtx_SET (VOIDmode, e1,
22671 gen_rtx_MULT (mode, e0, x0)));
22673 emit_insn (gen_rtx_SET (VOIDmode, e2,
22674 gen_rtx_MINUS (mode, three, e1)));
22677 emit_insn (gen_rtx_SET (VOIDmode, e3,
22678 gen_rtx_MULT (mode, half, x0)));
22681 emit_insn (gen_rtx_SET (VOIDmode, e3,
22682 gen_rtx_MULT (mode, half, e0)));
22683 /* ret = e2 * e3 */
22684 emit_insn (gen_rtx_SET (VOIDmode, res,
22685 gen_rtx_MULT (mode, e2, e3)));
22688 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22690 static void ATTRIBUTE_UNUSED
22691 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22694 /* With Binutils 2.15, the "@unwind" marker must be specified on
22695 every occurrence of the ".eh_frame" section, not just the first
22698 && strcmp (name, ".eh_frame") == 0)
22700 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22701 flags & SECTION_WRITE ? "aw" : "a");
22704 default_elf_asm_named_section (name, flags, decl);
22707 /* Return the mangling of TYPE if it is an extended fundamental type. */
22709 static const char *
22710 ix86_mangle_fundamental_type (tree type)
22712 switch (TYPE_MODE (type))
22715 /* __float128 is "g". */
22718 /* "long double" or __float80 is "e". */
22725 /* For 32-bit code we can save PIC register setup by using
22726 __stack_chk_fail_local hidden function instead of calling
22727 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22728 register, so it is better to call __stack_chk_fail directly. */
22731 ix86_stack_protect_fail (void)
22733 return TARGET_64BIT
22734 ? default_external_stack_protect_fail ()
22735 : default_hidden_stack_protect_fail ();
22738 /* Select a format to encode pointers in exception handling data. CODE
22739 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22740 true if the symbol may be affected by dynamic relocations.
22742 ??? All x86 object file formats are capable of representing this.
22743 After all, the relocation needed is the same as for the call insn.
22744 Whether or not a particular assembler allows us to enter such, I
22745 guess we'll have to see. */
22747 asm_preferred_eh_data_format (int code, int global)
22751 int type = DW_EH_PE_sdata8;
22753 || ix86_cmodel == CM_SMALL_PIC
22754 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22755 type = DW_EH_PE_sdata4;
22756 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22758 if (ix86_cmodel == CM_SMALL
22759 || (ix86_cmodel == CM_MEDIUM && code))
22760 return DW_EH_PE_udata4;
22761 return DW_EH_PE_absptr;
22764 /* Expand copysign from SIGN to the positive value ABS_VALUE
22765 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
22768 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22770 enum machine_mode mode = GET_MODE (sign);
22771 rtx sgn = gen_reg_rtx (mode);
22772 if (mask == NULL_RTX)
22774 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22775 if (!VECTOR_MODE_P (mode))
22777 /* We need to generate a scalar mode mask in this case. */
22778 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22779 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22780 mask = gen_reg_rtx (mode);
22781 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22785 mask = gen_rtx_NOT (mode, mask);
22786 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22787 gen_rtx_AND (mode, mask, sign)));
22788 emit_insn (gen_rtx_SET (VOIDmode, result,
22789 gen_rtx_IOR (mode, abs_value, sgn)));
22792 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22793 mask for masking out the sign-bit is stored in *SMASK, if that is
22796 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22798 enum machine_mode mode = GET_MODE (op0);
22801 xa = gen_reg_rtx (mode);
22802 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22803 if (!VECTOR_MODE_P (mode))
22805 /* We need to generate a scalar mode mask in this case. */
22806 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22807 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22808 mask = gen_reg_rtx (mode);
22809 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22811 emit_insn (gen_rtx_SET (VOIDmode, xa,
22812 gen_rtx_AND (mode, op0, mask)));
22820 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22821 swapping the operands if SWAP_OPERANDS is true. The expanded
22822 code is a forward jump to a newly created label in case the
22823 comparison is true. The generated label rtx is returned. */
22825 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22826 bool swap_operands)
22837 label = gen_label_rtx ();
22838 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22839 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22840 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22841 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22842 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22843 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22844 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22845 JUMP_LABEL (tmp) = label;
22850 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22851 using comparison code CODE. Operands are swapped for the comparison if
22852 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22854 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22855 bool swap_operands)
22857 enum machine_mode mode = GET_MODE (op0);
22858 rtx mask = gen_reg_rtx (mode);
22867 if (mode == DFmode)
22868 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22869 gen_rtx_fmt_ee (code, mode, op0, op1)));
22871 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22872 gen_rtx_fmt_ee (code, mode, op0, op1)));
22877 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22878 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22880 ix86_gen_TWO52 (enum machine_mode mode)
22882 REAL_VALUE_TYPE TWO52r;
22885 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22886 TWO52 = const_double_from_real_value (TWO52r, mode);
22887 TWO52 = force_reg (mode, TWO52);
22892 /* Expand SSE sequence for computing lround from OP1 storing
22895 ix86_expand_lround (rtx op0, rtx op1)
22897 /* C code for the stuff we're doing below:
22898 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22901 enum machine_mode mode = GET_MODE (op1);
22902 const struct real_format *fmt;
22903 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22906 /* load nextafter (0.5, 0.0) */
22907 fmt = REAL_MODE_FORMAT (mode);
22908 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22909 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22911 /* adj = copysign (0.5, op1) */
22912 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22913 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22915 /* adj = op1 + adj */
22916 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22918 /* op0 = (imode)adj */
22919 expand_fix (op0, adj, 0);
22922 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22925 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22927 /* C code for the stuff we're doing below (for do_floor):
22929 xi -= (double)xi > op1 ? 1 : 0;
22932 enum machine_mode fmode = GET_MODE (op1);
22933 enum machine_mode imode = GET_MODE (op0);
22934 rtx ireg, freg, label, tmp;
22936 /* reg = (long)op1 */
22937 ireg = gen_reg_rtx (imode);
22938 expand_fix (ireg, op1, 0);
22940 /* freg = (double)reg */
22941 freg = gen_reg_rtx (fmode);
22942 expand_float (freg, ireg, 0);
22944 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22945 label = ix86_expand_sse_compare_and_jump (UNLE,
22946 freg, op1, !do_floor);
22947 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22948 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22949 emit_move_insn (ireg, tmp);
22951 emit_label (label);
22952 LABEL_NUSES (label) = 1;
22954 emit_move_insn (op0, ireg);
22957 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22958 result in OPERAND0. */
22960 ix86_expand_rint (rtx operand0, rtx operand1)
22962 /* C code for the stuff we're doing below:
22963 xa = fabs (operand1);
22964 if (!isless (xa, 2**52))
22966 xa = xa + 2**52 - 2**52;
22967 return copysign (xa, operand1);
22969 enum machine_mode mode = GET_MODE (operand0);
22970 rtx res, xa, label, TWO52, mask;
22972 res = gen_reg_rtx (mode);
22973 emit_move_insn (res, operand1);
22975 /* xa = abs (operand1) */
22976 xa = ix86_expand_sse_fabs (res, &mask);
22978 /* if (!isless (xa, TWO52)) goto label; */
22979 TWO52 = ix86_gen_TWO52 (mode);
22980 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22982 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22983 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22985 ix86_sse_copysign_to_positive (res, xa, res, mask);
22987 emit_label (label);
22988 LABEL_NUSES (label) = 1;
22990 emit_move_insn (operand0, res);
22993 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22996 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22998 /* C code for the stuff we expand below.
22999 double xa = fabs (x), x2;
23000 if (!isless (xa, TWO52))
23002 xa = xa + TWO52 - TWO52;
23003 x2 = copysign (xa, x);
23012 enum machine_mode mode = GET_MODE (operand0);
23013 rtx xa, TWO52, tmp, label, one, res, mask;
23015 TWO52 = ix86_gen_TWO52 (mode);
23017 /* Temporary for holding the result, initialized to the input
23018 operand to ease control flow. */
23019 res = gen_reg_rtx (mode);
23020 emit_move_insn (res, operand1);
23022 /* xa = abs (operand1) */
23023 xa = ix86_expand_sse_fabs (res, &mask);
23025 /* if (!isless (xa, TWO52)) goto label; */
23026 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23028 /* xa = xa + TWO52 - TWO52; */
23029 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23030 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23032 /* xa = copysign (xa, operand1) */
23033 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23035 /* generate 1.0 or -1.0 */
23036 one = force_reg (mode,
23037 const_double_from_real_value (do_floor
23038 ? dconst1 : dconstm1, mode));
23040 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23041 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23042 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23043 gen_rtx_AND (mode, one, tmp)));
23044 /* We always need to subtract here to preserve signed zero. */
23045 tmp = expand_simple_binop (mode, MINUS,
23046 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23047 emit_move_insn (res, tmp);
23049 emit_label (label);
23050 LABEL_NUSES (label) = 1;
23052 emit_move_insn (operand0, res);
23055 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23058 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23060 /* C code for the stuff we expand below.
23061 double xa = fabs (x), x2;
23062 if (!isless (xa, TWO52))
23064 x2 = (double)(long)x;
23071 if (HONOR_SIGNED_ZEROS (mode))
23072 return copysign (x2, x);
23075 enum machine_mode mode = GET_MODE (operand0);
23076 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23078 TWO52 = ix86_gen_TWO52 (mode);
23080 /* Temporary for holding the result, initialized to the input
23081 operand to ease control flow. */
23082 res = gen_reg_rtx (mode);
23083 emit_move_insn (res, operand1);
23085 /* xa = abs (operand1) */
23086 xa = ix86_expand_sse_fabs (res, &mask);
23088 /* if (!isless (xa, TWO52)) goto label; */
23089 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23091 /* xa = (double)(long)x */
23092 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23093 expand_fix (xi, res, 0);
23094 expand_float (xa, xi, 0);
23097 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23099 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23100 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23101 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23102 gen_rtx_AND (mode, one, tmp)));
23103 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23104 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23105 emit_move_insn (res, tmp);
23107 if (HONOR_SIGNED_ZEROS (mode))
23108 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23110 emit_label (label);
23111 LABEL_NUSES (label) = 1;
23113 emit_move_insn (operand0, res);
23116 /* Expand SSE sequence for computing round from OPERAND1 storing
23117 into OPERAND0. Sequence that works without relying on DImode truncation
23118 via cvttsd2siq that is only available on 64bit targets. */
23120 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23122 /* C code for the stuff we expand below.
23123 double xa = fabs (x), xa2, x2;
23124 if (!isless (xa, TWO52))
23126 Using the absolute value and copying back sign makes
23127 -0.0 -> -0.0 correct.
23128 xa2 = xa + TWO52 - TWO52;
23133 else if (dxa > 0.5)
23135 x2 = copysign (xa2, x);
23138 enum machine_mode mode = GET_MODE (operand0);
23139 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23141 TWO52 = ix86_gen_TWO52 (mode);
23143 /* Temporary for holding the result, initialized to the input
23144 operand to ease control flow. */
23145 res = gen_reg_rtx (mode);
23146 emit_move_insn (res, operand1);
23148 /* xa = abs (operand1) */
23149 xa = ix86_expand_sse_fabs (res, &mask);
23151 /* if (!isless (xa, TWO52)) goto label; */
23152 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23154 /* xa2 = xa + TWO52 - TWO52; */
23155 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23156 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23158 /* dxa = xa2 - xa; */
23159 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23161 /* generate 0.5, 1.0 and -0.5 */
23162 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23163 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23164 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23168 tmp = gen_reg_rtx (mode);
23169 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23170 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23171 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23172 gen_rtx_AND (mode, one, tmp)));
23173 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23174 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23175 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23176 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23177 gen_rtx_AND (mode, one, tmp)));
23178 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23180 /* res = copysign (xa2, operand1) */
23181 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23183 emit_label (label);
23184 LABEL_NUSES (label) = 1;
23186 emit_move_insn (operand0, res);
23189 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23192 ix86_expand_trunc (rtx operand0, rtx operand1)
23194 /* C code for SSE variant we expand below.
23195 double xa = fabs (x), x2;
23196 if (!isless (xa, TWO52))
23198 x2 = (double)(long)x;
23199 if (HONOR_SIGNED_ZEROS (mode))
23200 return copysign (x2, x);
23203 enum machine_mode mode = GET_MODE (operand0);
23204 rtx xa, xi, TWO52, label, res, mask;
23206 TWO52 = ix86_gen_TWO52 (mode);
23208 /* Temporary for holding the result, initialized to the input
23209 operand to ease control flow. */
23210 res = gen_reg_rtx (mode);
23211 emit_move_insn (res, operand1);
23213 /* xa = abs (operand1) */
23214 xa = ix86_expand_sse_fabs (res, &mask);
23216 /* if (!isless (xa, TWO52)) goto label; */
23217 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23219 /* x = (double)(long)x */
23220 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23221 expand_fix (xi, res, 0);
23222 expand_float (res, xi, 0);
23224 if (HONOR_SIGNED_ZEROS (mode))
23225 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23227 emit_label (label);
23228 LABEL_NUSES (label) = 1;
23230 emit_move_insn (operand0, res);
23233 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23236 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23238 enum machine_mode mode = GET_MODE (operand0);
23239 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23241 /* C code for SSE variant we expand below.
23242 double xa = fabs (x), x2;
23243 if (!isless (xa, TWO52))
23245 xa2 = xa + TWO52 - TWO52;
23249 x2 = copysign (xa2, x);
23253 TWO52 = ix86_gen_TWO52 (mode);
23255 /* Temporary for holding the result, initialized to the input
23256 operand to ease control flow. */
23257 res = gen_reg_rtx (mode);
23258 emit_move_insn (res, operand1);
23260 /* xa = abs (operand1) */
23261 xa = ix86_expand_sse_fabs (res, &smask);
23263 /* if (!isless (xa, TWO52)) goto label; */
23264 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23266 /* res = xa + TWO52 - TWO52; */
23267 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23268 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23269 emit_move_insn (res, tmp);
23272 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23274 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23275 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23276 emit_insn (gen_rtx_SET (VOIDmode, mask,
23277 gen_rtx_AND (mode, mask, one)));
23278 tmp = expand_simple_binop (mode, MINUS,
23279 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23280 emit_move_insn (res, tmp);
23282 /* res = copysign (res, operand1) */
23283 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23285 emit_label (label);
23286 LABEL_NUSES (label) = 1;
23288 emit_move_insn (operand0, res);
23291 /* Expand SSE sequence for computing round from OPERAND1 storing
23294 ix86_expand_round (rtx operand0, rtx operand1)
23296 /* C code for the stuff we're doing below:
23297 double xa = fabs (x);
23298 if (!isless (xa, TWO52))
23300 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23301 return copysign (xa, x);
23303 enum machine_mode mode = GET_MODE (operand0);
23304 rtx res, TWO52, xa, label, xi, half, mask;
23305 const struct real_format *fmt;
23306 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23308 /* Temporary for holding the result, initialized to the input
23309 operand to ease control flow. */
23310 res = gen_reg_rtx (mode);
23311 emit_move_insn (res, operand1);
23313 TWO52 = ix86_gen_TWO52 (mode);
23314 xa = ix86_expand_sse_fabs (res, &mask);
23315 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23317 /* load nextafter (0.5, 0.0) */
23318 fmt = REAL_MODE_FORMAT (mode);
23319 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
23320 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23322 /* xa = xa + 0.5 */
23323 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23324 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23326 /* xa = (double)(int64_t)xa */
23327 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23328 expand_fix (xi, xa, 0);
23329 expand_float (xa, xi, 0);
23331 /* res = copysign (xa, operand1) */
23332 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23334 emit_label (label);
23335 LABEL_NUSES (label) = 1;
23337 emit_move_insn (operand0, res);
23341 /* Table of valid machine attributes. */
23342 static const struct attribute_spec ix86_attribute_table[] =
23344 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23345 /* Stdcall attribute says callee is responsible for popping arguments
23346 if they are not variable. */
23347 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23348 /* Fastcall attribute says callee is responsible for popping arguments
23349 if they are not variable. */
23350 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23351 /* Cdecl attribute says the callee is a normal C declaration */
23352 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23353 /* Regparm attribute specifies how many integer arguments are to be
23354 passed in registers. */
23355 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23356 /* Sseregparm attribute says we are using x86_64 calling conventions
23357 for FP arguments. */
23358 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23359 /* force_align_arg_pointer says this function realigns the stack at entry. */
23360 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23361 false, true, true, ix86_handle_cconv_attribute },
23362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23363 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23364 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23365 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23367 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23368 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23369 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23370 SUBTARGET_ATTRIBUTE_TABLE,
23372 { NULL, 0, 0, false, false, false, NULL }
23375 /* Initialize the GCC target structure. */
23376 #undef TARGET_ATTRIBUTE_TABLE
23377 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23378 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23379 # undef TARGET_MERGE_DECL_ATTRIBUTES
23380 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23383 #undef TARGET_COMP_TYPE_ATTRIBUTES
23384 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23386 #undef TARGET_INIT_BUILTINS
23387 #define TARGET_INIT_BUILTINS ix86_init_builtins
23388 #undef TARGET_EXPAND_BUILTIN
23389 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23391 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23392 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23393 ix86_builtin_vectorized_function
23395 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23396 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23398 #undef TARGET_BUILTIN_RECIPROCAL
23399 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23401 #undef TARGET_ASM_FUNCTION_EPILOGUE
23402 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23404 #undef TARGET_ENCODE_SECTION_INFO
23405 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23406 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23408 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23411 #undef TARGET_ASM_OPEN_PAREN
23412 #define TARGET_ASM_OPEN_PAREN ""
23413 #undef TARGET_ASM_CLOSE_PAREN
23414 #define TARGET_ASM_CLOSE_PAREN ""
23416 #undef TARGET_ASM_ALIGNED_HI_OP
23417 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23418 #undef TARGET_ASM_ALIGNED_SI_OP
23419 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23421 #undef TARGET_ASM_ALIGNED_DI_OP
23422 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23425 #undef TARGET_ASM_UNALIGNED_HI_OP
23426 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23427 #undef TARGET_ASM_UNALIGNED_SI_OP
23428 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23429 #undef TARGET_ASM_UNALIGNED_DI_OP
23430 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23432 #undef TARGET_SCHED_ADJUST_COST
23433 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23434 #undef TARGET_SCHED_ISSUE_RATE
23435 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23436 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23437 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23438 ia32_multipass_dfa_lookahead
23440 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23441 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23444 #undef TARGET_HAVE_TLS
23445 #define TARGET_HAVE_TLS true
23447 #undef TARGET_CANNOT_FORCE_CONST_MEM
23448 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23449 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23450 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
23452 #undef TARGET_DELEGITIMIZE_ADDRESS
23453 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23455 #undef TARGET_MS_BITFIELD_LAYOUT_P
23456 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23459 #undef TARGET_BINDS_LOCAL_P
23460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23462 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23463 #undef TARGET_BINDS_LOCAL_P
23464 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23467 #undef TARGET_ASM_OUTPUT_MI_THUNK
23468 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23472 #undef TARGET_ASM_FILE_START
23473 #define TARGET_ASM_FILE_START x86_file_start
23475 #undef TARGET_DEFAULT_TARGET_FLAGS
23476 #define TARGET_DEFAULT_TARGET_FLAGS \
23478 | TARGET_SUBTARGET_DEFAULT \
23479 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23481 #undef TARGET_HANDLE_OPTION
23482 #define TARGET_HANDLE_OPTION ix86_handle_option
23484 #undef TARGET_RTX_COSTS
23485 #define TARGET_RTX_COSTS ix86_rtx_costs
23486 #undef TARGET_ADDRESS_COST
23487 #define TARGET_ADDRESS_COST ix86_address_cost
23489 #undef TARGET_FIXED_CONDITION_CODE_REGS
23490 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23491 #undef TARGET_CC_MODES_COMPATIBLE
23492 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23494 #undef TARGET_MACHINE_DEPENDENT_REORG
23495 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23497 #undef TARGET_BUILD_BUILTIN_VA_LIST
23498 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23500 #undef TARGET_MD_ASM_CLOBBERS
23501 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23503 #undef TARGET_PROMOTE_PROTOTYPES
23504 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
23505 #undef TARGET_STRUCT_VALUE_RTX
23506 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23507 #undef TARGET_SETUP_INCOMING_VARARGS
23508 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23509 #undef TARGET_MUST_PASS_IN_STACK
23510 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23511 #undef TARGET_PASS_BY_REFERENCE
23512 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23513 #undef TARGET_INTERNAL_ARG_POINTER
23514 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23515 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23516 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23517 #undef TARGET_STRICT_ARGUMENT_NAMING
23518 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23520 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23521 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23523 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23524 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23527 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23529 #undef TARGET_C_MODE_FOR_SUFFIX
23530 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23533 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23534 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23537 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23538 #undef TARGET_INSERT_ATTRIBUTES
23539 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23542 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
23543 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
23545 #undef TARGET_STACK_PROTECT_FAIL
23546 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23548 #undef TARGET_FUNCTION_VALUE
23549 #define TARGET_FUNCTION_VALUE ix86_function_value
23551 struct gcc_target targetm = TARGET_INITIALIZER;
23553 #include "gt-i386.h"